diff --git a/docs/en/reference/nn/modules/activation.md b/docs/en/reference/nn/modules/activation.md new file mode 100644 index 0000000000..09dd92edc6 --- /dev/null +++ b/docs/en/reference/nn/modules/activation.md @@ -0,0 +1,16 @@ +--- +description: Explore activation functions in Ultralytics, including the Unified activation function and other custom implementations for neural networks. +keywords: ultralytics, activation functions, neural networks, Unified activation, AGLU, SiLU, ReLU, PyTorch, deep learning, custom activations +--- + +# Reference for `ultralytics/nn/modules/activation.py` + +!!! Note + + This file is available at [https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/activation.py](https://github.com/ultralytics/ultralytics/blob/main/ultralytics/nn/modules/activation.py). If you spot a problem please help fix it by [contributing](https://docs.ultralytics.com/help/contributing/) a [Pull Request](https://github.com/ultralytics/ultralytics/edit/main/ultralytics/nn/modules/activation.py) 🛠️. Thank you 🙏! + +
+ +## ::: ultralytics.nn.modules.activation.AGLU + +

diff --git a/mkdocs.yml b/mkdocs.yml index 59d2f7df05..df506fe847 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -537,6 +537,7 @@ nav: - nn: - autobackend: reference/nn/autobackend.md - modules: + - activation: reference/nn/modules/activation.md - block: reference/nn/modules/block.md - conv: reference/nn/modules/conv.md - head: reference/nn/modules/head.md diff --git a/ultralytics/data/dataset.py b/ultralytics/data/dataset.py index 717654d483..ca2fb2662c 100644 --- a/ultralytics/data/dataset.py +++ b/ultralytics/data/dataset.py @@ -431,6 +431,12 @@ class ClassificationDataset: self.samples = self.samples[: round(len(self.samples) * args.fraction)] self.prefix = colorstr(f"{prefix}: ") if prefix else "" self.cache_ram = args.cache is True or str(args.cache).lower() == "ram" # cache images into RAM + if self.cache_ram: + LOGGER.warning( + "WARNING ⚠️ Classification `cache_ram` training has known memory leak in " + "https://github.com/ultralytics/ultralytics/issues/9824, setting `cache_ram=False`." + ) + self.cache_ram = False self.cache_disk = str(args.cache).lower() == "disk" # cache images on hard drive as uncompressed *.npy files self.samples = self.verify_images() # filter out bad images self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples] # file, index, npy, im diff --git a/ultralytics/engine/trainer.py b/ultralytics/engine/trainer.py index 4415ba94eb..7184082240 100644 --- a/ultralytics/engine/trainer.py +++ b/ultralytics/engine/trainer.py @@ -41,7 +41,6 @@ from ultralytics.utils.checks import check_amp, check_file, check_imgsz, check_m from ultralytics.utils.dist import ddp_cleanup, generate_ddp_command from ultralytics.utils.files import get_latest_run from ultralytics.utils.torch_utils import ( - TORCH_1_13, EarlyStopping, ModelEMA, autocast, @@ -266,11 +265,7 @@ class BaseTrainer: if RANK > -1 and world_size > 1: # DDP dist.broadcast(self.amp, src=0) # broadcast the tensor from rank 0 to all other ranks (returns None) self.amp = bool(self.amp) # as boolean - self.scaler = ( - torch.amp.GradScaler("cuda", enabled=self.amp) - if TORCH_1_13 - else torch.cuda.amp.GradScaler(enabled=self.amp) - ) + self.scaler = torch.cuda.amp.GradScaler(enabled=self.amp) if world_size > 1: self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[RANK], find_unused_parameters=True) diff --git a/ultralytics/nn/modules/activation.py b/ultralytics/nn/modules/activation.py new file mode 100644 index 0000000000..25cca2a508 --- /dev/null +++ b/ultralytics/nn/modules/activation.py @@ -0,0 +1,22 @@ +# Ultralytics YOLO 🚀, AGPL-3.0 license +"""Activation modules.""" + +import torch +import torch.nn as nn + + +class AGLU(nn.Module): + """Unified activation function module from https://github.com/kostas1515/AGLU.""" + + def __init__(self, device=None, dtype=None) -> None: + """Initialize the Unified activation function.""" + super().__init__() + self.act = nn.Softplus(beta=-1.0) + self.lambd = nn.Parameter(nn.init.uniform_(torch.empty(1, device=device, dtype=dtype))) # lambda parameter + self.kappa = nn.Parameter(nn.init.uniform_(torch.empty(1, device=device, dtype=dtype))) # kappa parameter + + def forward(self, x: torch.Tensor) -> torch.Tensor: + """Compute the forward pass of the Unified activation function.""" + lam = torch.clamp(self.lambd, min=0.0001) + y = torch.exp((1 / lam) * self.act((self.kappa * x) - torch.log(lam))) + return y # for AGLU simply return y * input