diff --git a/docs/en/reference/nn/modules/activation.md b/docs/en/reference/nn/modules/activation.md
new file mode 100644
index 0000000000..09dd92edc6
--- /dev/null
+++ b/docs/en/reference/nn/modules/activation.md
@@ -0,0 +1,16 @@
+---
+description: Explore activation functions in Ultralytics, including the Unified activation function and other custom implementations for neural networks.
+keywords: ultralytics, activation functions, neural networks, Unified activation, AGLU, SiLU, ReLU, PyTorch, deep learning, custom activations
+---
+
+# Reference for `ultralytics/nn/modules/activation.py`
+
+!!! Note
+
+ This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/activation.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/activation.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/activation.py) 🛠️. Thank you 🙏!
+
+
+
+## ::: ultralytics.nn.modules.activation.AGLU
+
+
diff --git a/mkdocs.yml b/mkdocs.yml
index 59d2f7df05..df506fe847 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -537,6 +537,7 @@ nav:
- nn:
- autobackend: reference/nn/autobackend.md
- modules:
+ - activation: reference/nn/modules/activation.md
- block: reference/nn/modules/block.md
- conv: reference/nn/modules/conv.md
- head: reference/nn/modules/head.md
diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py
index 717654d483..ca2fb2662c 100644
--- a/ultralytics/data/dataset.py
+++ b/ultralytics/data/dataset.py
@@ -431,6 +431,12 @@ class ClassificationDataset:
self.samples = self.samples[: round(len(self.samples) * args.fraction)]
self.prefix = colorstr(f"{prefix}: ") if prefix else ""
self.cache_ram = args.cache is True or str(args.cache).lower() == "ram" # cache images into RAM
+ if self.cache_ram:
+ LOGGER.warning(
+ "WARNING ⚠️ Classification `cache_ram` training has known memory leak in "
+ "https://github.com/ultralytics/ultralytics/issues/9824, setting `cache_ram=False`."
+ )
+ self.cache_ram = False
self.cache_disk = str(args.cache).lower() == "disk" # cache images on hard drive as uncompressed *.npy files
self.samples = self.verify_images() # filter out bad images
self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im
diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py
index 4415ba94eb..7184082240 100644
--- a/ultralytics/engine/trainer.py
+++ b/ultralytics/engine/trainer.py
@@ -41,7 +41,6 @@ from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_m
from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command
from ultralytics.utils.files import get_latest_run
from ultralytics.utils.torch_utils import (
- TORCH_1_13,
EarlyStopping,
ModelEMA,
autocast,
@@ -266,11 +265,7 @@ class BaseTrainer:
if RANK > -1 and world_size > 1: # DDP
dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None)
self.amp = bool(self.amp) # as boolean
- self.scaler = (
- torch.amp.GradScaler("cuda", enabled=self.amp)
- if TORCH_1_13
- else torch.cuda.amp.GradScaler(enabled=self.amp)
- )
+ self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp)
if world_size > 1:
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True)
diff --git a/ultralytics/nn/modules/activation.py b/ultralytics/nn/modules/activation.py
new file mode 100644
index 0000000000..25cca2a508
--- /dev/null
+++ b/ultralytics/nn/modules/activation.py
@@ -0,0 +1,22 @@
+# Ultralytics YOLO 🚀, AGPL-3.0 license
+"""Activation modules."""
+
+import torch
+import torch.nn as nn
+
+
+class AGLU(nn.Module):
+ """Unified activation function module from https://github.com/kostas1515/AGLU."""
+
+ def __init__(self, device=None, dtype=None) -> None:
+ """Initialize the Unified activation function."""
+ super().__init__()
+ self.act = nn.Softplus(beta=-1.0)
+ self.lambd = nn.Parameter(nn.init.uniform_(torch.empty(1, device=device, dtype=dtype))) # lambda parameter
+ self.kappa = nn.Parameter(nn.init.uniform_(torch.empty(1, device=device, dtype=dtype))) # kappa parameter
+
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
+ """Compute the forward pass of the Unified activation function."""
+ lam = torch.clamp(self.lambd, min=0.0001)
+ y = torch.exp((1 / lam) * self.act((self.kappa * x) - torch.log(lam)))
+ return y # for AGLU simply return y * input