Update augment.py

3 months ago · 659fdb6422
parent 9129d8a397
commit 659fdb6422
1 changed files with 115 additions and 12 deletions
--- a/ultralytics/data/augment.py
+++ b/ultralytics/data/augment.py
@ -794,7 +794,106 @@ class LetterBox:
        return labels


-class CopyPaste:
+class CopyPaste(BaseMixTransform):
+    """
+    Implements Copy-Paste augmentation as described in https://arxiv.org/abs/2012.07177.
+
+    This class applies Copy-Paste augmentation on images and their corresponding instances.
+
+    Attributes:
+        dataset: The dataset on which the copypaste augmentation is applied.
+        pre_transform: The pre-transforms for the mixed labels.
+        p (float): Probability of applying the Copy-Paste augmentation. Must be between 0 and 1.
+
+    Methods:
+        __call__: Applies Copy-Paste augmentation to given image and instances.
+
+    Examples:
+        >>> copypaste = CopyPaste(dataset, p=0.5)
+        >>> augmented_labels = copypaste(labels)
+        >>> augmented_image = augmented_labels['img']
+    """
+
+    def __init__(self, dataset, pre_transform=None, p=0.5) -> None:
+        """Initializes CopyPaste object with dataset, pre_transform, and probability of applying MixUp."""
+        super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
+
+    def get_indexes(self):
+        """
+        Get a random index from the dataset.
+
+        This method returns a single random index from the dataset, which is used to select an image for MixUp
+        augmentation.
+
+        Returns:
+            (int): A random integer index within the range of the dataset length.
+
+        Examples:
+            >>> copypaste = CopyPaste(dataset)
+            >>> index = copypaste.get_indexes()
+            >>> print(index)
+            42
+        """
+        return random.randint(0, len(self.dataset) - 1)
+
+    def _mix_transform(self, labels):
+        """Applies CopyPaste augmentation."""
+        labels2 = labels["mix_labels"][0]
+        im = labels["img"]
+        cls = labels["cls"]
+        h, w = im.shape[:2]
+        instances = labels.pop("instances")
+        instances.convert_bbox(format="xyxy")
+        instances.denormalize(w, h)
+
+        im_new = np.zeros(im.shape, np.uint8)
+        instances2 = labels2.pop("instances")
+        ioa = bbox_ioa(instances2.bboxes, instances.bboxes)  # intersection over area, (N, M)
+        indexes = np.nonzero((ioa < 0.30).all(1))[0]  # (N, )
+        n = len(indexes)
+        # for j in random.sample(list(indexes), k=round(self.p * n)):
+        sorted_idx = np.argsort(ioa.max(1)[indexes])
+        indexes = indexes[sorted_idx]
+        for j in indexes[: round(self.p * n)]:
+            cls = np.concatenate((cls, labels2["cls"][[j]]), axis=0)
+            instances = Instances.concatenate((instances, instances2[[j]]), axis=0)
+            cv2.drawContours(im_new, instances2.segments[[j]].astype(np.int32), -1, (1, 1, 1), cv2.FILLED)
+
+        result = labels2["img"]  # augment segments
+        i = im_new.astype(bool)
+        im[i] = result[i]
+
+        labels["img"] = im
+        labels["cls"] = cls
+        labels["instances"] = instances
+        return labels
+
+    def __call__(self, labels):
+        """Applies pre-processing transforms and copy_paste transforms to labels data."""
+        if len(labels["instances"].segments) == 0 or self.p == 0:
+            return labels
+        # Get index of one or three other images
+        indexes = self.get_indexes()
+        if isinstance(indexes, int):
+            indexes = [indexes]
+
+        # Get images information will be used for Mosaic or MixUp
+        mix_labels = [self.dataset.get_image_and_label(i) for i in indexes]
+
+        if self.pre_transform is not None:
+            for i, data in enumerate(mix_labels):
+                mix_labels[i] = self.pre_transform(data)
+        labels["mix_labels"] = mix_labels
+
+        # Update cls and texts
+        labels = self._update_label_text(labels)
+        # Mosaic or MixUp
+        labels = self._mix_transform(labels)
+        labels.pop("mix_labels", None)
+        return labels
+
+
+class OldCopyPaste:
    """
    Implements the Copy-Paste augmentation as described in the paper https://arxiv.org/abs/2012.07177. This class is
    responsible for applying the Copy-Paste augmentation on images and their corresponding instances.
@ -1096,18 +1195,22 @@ class RandomLoadText:

 def v8_transforms(dataset, imgsz, hyp, stretch=False):
    """Convert images to a size suitable for YOLOv8 training."""
-    pre_transform = Compose(
-        [
-            Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic),
-            CopyPaste(p=hyp.copy_paste),
-            RandomPerspective(
+    mosaic = Mosaic(dataset, imgsz=imgsz, p=hyp.mosaic)
+    affine = RandomPerspective(
        degrees=hyp.degrees,
        translate=hyp.translate,
        scale=hyp.scale,
        shear=hyp.shear,
        perspective=hyp.perspective,
        pre_transform=None if stretch else LetterBox(new_shape=(imgsz, imgsz)),
-            ),
+    )
+    pre_transform = Compose(
+        [
+            mosaic,
+            # CopyPaste(dataset, pre_transform=mosaic, p=hyp.copy_paste),
+            # OldCopyPaste(p=hyp.copy_paste),
+            affine,
+            CopyPaste(dataset, pre_transform=Compose([mosaic, affine]), p=hyp.copy_paste),
        ]
    )
    flip_idx = dataset.data.get("flip_idx", [])  # for keypoints augmentation