update ckip_frame to vid_stride

4 months ago · d5231e8eaf
parent d9694ed017
commit d5231e8eaf
2 changed files with 7 additions and 7 deletions
--- a/docs/en/guides/action-recognition.md
+++ b/docs/en/guides/action-recognition.md
@ -71,7 +71,7 @@ cv2.destroyAllWindows()
 - `fp16`: Whether to use half-precision floating point. Defaults to `False`.
 - `crop_margin_percentage`: Percentage of margin to add around detected objects. Defaults to ``.
 - `num_video_sequence_samples`: Number of sequential video frames to use for action recognition. Defaults to `8`.
- `skip_frame`: Number of frames to skip between detections. Defaults to `2`.
+- `vid_stride`: Number of frames to skip between detections. Defaults to `2`.
 - `video_cls_overlap_ratio`: Overlap ratio between video sequences. Defaults to `0.25`.
 - `device`: The device to run the model on. Defaults to `""`.

--- a/ultralytics/solutions/action_recognition.py
+++ b/ultralytics/solutions/action_recognition.py
@ -22,7 +22,7 @@ class ActionRecognition:
        fp16=False,
        crop_margin_percentage=10,
        num_video_sequence_samples=8,
-        skip_frame=2,
+        vid_stride=2,
        video_cls_overlap_ratio=0.25,
        device="",
    ):
@ -35,7 +35,7 @@ class ActionRecognition:
            fp16 (bool, optional): Whether to use half-precision floating point. Defaults to False.
            crop_margin_percentage (int, optional): Percentage of margin to add around detected objects. Defaults to 10.
            num_video_sequence_samples (int, optional): Number of video frames to use for classification. Defaults to 8.
-            skip_frame (int, optional): Number of frames to skip between detections. Defaults to 2.
+            vid_stride (int, optional): Number of frames to skip between detections. Defaults to 2.
            video_cls_overlap_ratio (float, optional): Overlap ratio between video sequences. Defaults to 0.25.
            device (str or torch.device, optional): The device to run the model on. Defaults to "".
        """
@ -69,7 +69,7 @@ class ActionRecognition:
        # Properties with default values
        self.crop_margin_percentage = crop_margin_percentage
        self.num_video_sequence_samples = num_video_sequence_samples
-        self.skip_frame = skip_frame
+        self.vid_stride = vid_stride
        self.video_cls_overlap_ratio = video_cls_overlap_ratio

    def process_tracks(self, tracks):
@ -169,14 +169,14 @@ class ActionRecognition:

        self.process_tracks(tracks)

-        if self.frame_counter % self.skip_frame == 0:
+        if self.frame_counter % self.vid_stride == 0:
            crops_to_infer = []
            track_ids_to_infer = []

        for box, track_id in zip(self.boxes, self.track_ids):
            if (
                len(self.track_history[track_id]) == self.num_video_sequence_samples
-                and self.frame_counter % self.skip_frame == 0
+                and self.frame_counter % self.vid_stride == 0
            ):
                crops = self.video_classifier.preprocess_crops_for_video_cls(self.track_history[track_id])
                crops_to_infer.append(crops)
@ -185,7 +185,7 @@ class ActionRecognition:
        if crops_to_infer and (
            not pred_labels
            or self.frame_counter
-            % int(self.num_video_sequence_samples * self.skip_frame * (1 - self.video_cls_overlap_ratio))
+            % int(self.num_video_sequence_samples * self.vid_stride * (1 - self.video_cls_overlap_ratio))
            == 0
        ):
            crops_batch = torch.cat(crops_to_infer, dim=0)