|
|
|
@ -207,9 +207,7 @@ class TorchVisionVideoClassifier: |
|
|
|
|
supports_r3d = check_requirements("torchvision>=0.8.1", install=False) |
|
|
|
|
supports_transforms_v2 = check_requirements("torchvision>=0.16.0", install=False) |
|
|
|
|
supports_mvitv1b = supports_s3d = check_requirements("torchvision>=0.14.0", install=False) |
|
|
|
|
supports_mvitv2s = supports_swin3dt = supports_swin3db = check_requirements( |
|
|
|
|
"torchvision>=0.15.0", install=False |
|
|
|
|
) |
|
|
|
|
supports_mvitv2s = supports_swin3dt = supports_swin3db = check_requirements("torchvision>=0.15.0", install=False) |
|
|
|
|
|
|
|
|
|
model_name_to_model_and_weights = {} |
|
|
|
|
if supports_r3d: |
|
|
|
@ -396,7 +394,9 @@ class HuggingFaceVideoClassifier: |
|
|
|
|
[ |
|
|
|
|
v2.ToDtype(torch.float32, scale=True), |
|
|
|
|
v2.Resize(input_size, antialias=True), |
|
|
|
|
v2.Normalize(mean=self.processor.image_processor.image_mean, std=self.processor.image_processor.image_std), |
|
|
|
|
v2.Normalize( |
|
|
|
|
mean=self.processor.image_processor.image_mean, std=self.processor.image_processor.image_std |
|
|
|
|
), |
|
|
|
|
] |
|
|
|
|
) |
|
|
|
|
else: |
|
|
|
|