|
|
@ -41,7 +41,7 @@ class BaseModel(nn.Module): |
|
|
|
return self.loss(x, *args, **kwargs) |
|
|
|
return self.loss(x, *args, **kwargs) |
|
|
|
return self.predict(x, *args, **kwargs) |
|
|
|
return self.predict(x, *args, **kwargs) |
|
|
|
|
|
|
|
|
|
|
|
def predict(self, x, profile=False, visualize=False, augment=False): |
|
|
|
def predict(self, x, profile=False, visualize=False, augment=False, embed=None): |
|
|
|
""" |
|
|
|
""" |
|
|
|
Perform a forward pass through the network. |
|
|
|
Perform a forward pass through the network. |
|
|
|
|
|
|
|
|
|
|
@ -50,15 +50,16 @@ class BaseModel(nn.Module): |
|
|
|
profile (bool): Print the computation time of each layer if True, defaults to False. |
|
|
|
profile (bool): Print the computation time of each layer if True, defaults to False. |
|
|
|
visualize (bool): Save the feature maps of the model if True, defaults to False. |
|
|
|
visualize (bool): Save the feature maps of the model if True, defaults to False. |
|
|
|
augment (bool): Augment image during prediction, defaults to False. |
|
|
|
augment (bool): Augment image during prediction, defaults to False. |
|
|
|
|
|
|
|
embed (list, optional): A list of feature vectors/embeddings to return. |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
Returns: |
|
|
|
(torch.Tensor): The last output of the model. |
|
|
|
(torch.Tensor): The last output of the model. |
|
|
|
""" |
|
|
|
""" |
|
|
|
if augment: |
|
|
|
if augment: |
|
|
|
return self._predict_augment(x) |
|
|
|
return self._predict_augment(x) |
|
|
|
return self._predict_once(x, profile, visualize) |
|
|
|
return self._predict_once(x, profile, visualize, embed) |
|
|
|
|
|
|
|
|
|
|
|
def _predict_once(self, x, profile=False, visualize=False): |
|
|
|
def _predict_once(self, x, profile=False, visualize=False, embed=None): |
|
|
|
""" |
|
|
|
""" |
|
|
|
Perform a forward pass through the network. |
|
|
|
Perform a forward pass through the network. |
|
|
|
|
|
|
|
|
|
|
@ -66,11 +67,12 @@ class BaseModel(nn.Module): |
|
|
|
x (torch.Tensor): The input tensor to the model. |
|
|
|
x (torch.Tensor): The input tensor to the model. |
|
|
|
profile (bool): Print the computation time of each layer if True, defaults to False. |
|
|
|
profile (bool): Print the computation time of each layer if True, defaults to False. |
|
|
|
visualize (bool): Save the feature maps of the model if True, defaults to False. |
|
|
|
visualize (bool): Save the feature maps of the model if True, defaults to False. |
|
|
|
|
|
|
|
embed (list, optional): A list of feature vectors/embeddings to return. |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
Returns: |
|
|
|
(torch.Tensor): The last output of the model. |
|
|
|
(torch.Tensor): The last output of the model. |
|
|
|
""" |
|
|
|
""" |
|
|
|
y, dt = [], [] # outputs |
|
|
|
y, dt, embeddings = [], [], [] # outputs |
|
|
|
for m in self.model: |
|
|
|
for m in self.model: |
|
|
|
if m.f != -1: # if not from previous layer |
|
|
|
if m.f != -1: # if not from previous layer |
|
|
|
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers |
|
|
|
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers |
|
|
@ -80,6 +82,10 @@ class BaseModel(nn.Module): |
|
|
|
y.append(x if m.i in self.save else None) # save output |
|
|
|
y.append(x if m.i in self.save else None) # save output |
|
|
|
if visualize: |
|
|
|
if visualize: |
|
|
|
feature_visualization(x, m.type, m.i, save_dir=visualize) |
|
|
|
feature_visualization(x, m.type, m.i, save_dir=visualize) |
|
|
|
|
|
|
|
if embed and m.i in embed: |
|
|
|
|
|
|
|
embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten |
|
|
|
|
|
|
|
if m.i == max(embed): |
|
|
|
|
|
|
|
return torch.unbind(torch.cat(embeddings, 1), dim=0) |
|
|
|
return x |
|
|
|
return x |
|
|
|
|
|
|
|
|
|
|
|
def _predict_augment(self, x): |
|
|
|
def _predict_augment(self, x): |
|
|
@ -454,7 +460,7 @@ class RTDETRDetectionModel(DetectionModel): |
|
|
|
return sum(loss.values()), torch.as_tensor([loss[k].detach() for k in ['loss_giou', 'loss_class', 'loss_bbox']], |
|
|
|
return sum(loss.values()), torch.as_tensor([loss[k].detach() for k in ['loss_giou', 'loss_class', 'loss_bbox']], |
|
|
|
device=img.device) |
|
|
|
device=img.device) |
|
|
|
|
|
|
|
|
|
|
|
def predict(self, x, profile=False, visualize=False, batch=None, augment=False): |
|
|
|
def predict(self, x, profile=False, visualize=False, batch=None, augment=False, embed=None): |
|
|
|
""" |
|
|
|
""" |
|
|
|
Perform a forward pass through the model. |
|
|
|
Perform a forward pass through the model. |
|
|
|
|
|
|
|
|
|
|
@ -464,11 +470,12 @@ class RTDETRDetectionModel(DetectionModel): |
|
|
|
visualize (bool, optional): If True, save feature maps for visualization. Defaults to False. |
|
|
|
visualize (bool, optional): If True, save feature maps for visualization. Defaults to False. |
|
|
|
batch (dict, optional): Ground truth data for evaluation. Defaults to None. |
|
|
|
batch (dict, optional): Ground truth data for evaluation. Defaults to None. |
|
|
|
augment (bool, optional): If True, perform data augmentation during inference. Defaults to False. |
|
|
|
augment (bool, optional): If True, perform data augmentation during inference. Defaults to False. |
|
|
|
|
|
|
|
embed (list, optional): A list of feature vectors/embeddings to return. |
|
|
|
|
|
|
|
|
|
|
|
Returns: |
|
|
|
Returns: |
|
|
|
(torch.Tensor): Model's output tensor. |
|
|
|
(torch.Tensor): Model's output tensor. |
|
|
|
""" |
|
|
|
""" |
|
|
|
y, dt = [], [] # outputs |
|
|
|
y, dt, embeddings = [], [], [] # outputs |
|
|
|
for m in self.model[:-1]: # except the head part |
|
|
|
for m in self.model[:-1]: # except the head part |
|
|
|
if m.f != -1: # if not from previous layer |
|
|
|
if m.f != -1: # if not from previous layer |
|
|
|
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers |
|
|
|
x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers |
|
|
@ -478,6 +485,10 @@ class RTDETRDetectionModel(DetectionModel): |
|
|
|
y.append(x if m.i in self.save else None) # save output |
|
|
|
y.append(x if m.i in self.save else None) # save output |
|
|
|
if visualize: |
|
|
|
if visualize: |
|
|
|
feature_visualization(x, m.type, m.i, save_dir=visualize) |
|
|
|
feature_visualization(x, m.type, m.i, save_dir=visualize) |
|
|
|
|
|
|
|
if embed and m.i in embed: |
|
|
|
|
|
|
|
embeddings.append(nn.functional.adaptive_avg_pool2d(x, (1, 1)).squeeze(-1).squeeze(-1)) # flatten |
|
|
|
|
|
|
|
if m.i == max(embed): |
|
|
|
|
|
|
|
return torch.unbind(torch.cat(embeddings, 1), dim=0) |
|
|
|
head = self.model[-1] |
|
|
|
head = self.model[-1] |
|
|
|
x = head([y[j] for j in head.f], batch) # head inference |
|
|
|
x = head([y[j] for j in head.f], batch) # head inference |
|
|
|
return x |
|
|
|
return x |
|
|
|