update codes and demos

release_code
SlongLiu 2 years ago
parent cc02608066
commit 08bd3c2e9b
  1. 13
      demo/inference_on_a_image.py
  2. 5
      groundingdino/models/GroundingDINO/__init__.py
  3. 5
      groundingdino/models/GroundingDINO/backbone/backbone.py
  4. 5
      groundingdino/models/GroundingDINO/backbone/position_encoding.py
  5. 5
      groundingdino/models/GroundingDINO/backbone/swin_transformer.py
  6. 7
      groundingdino/models/GroundingDINO/bertwarper.py
  7. 7
      groundingdino/models/GroundingDINO/fuse_modules.py
  8. 5
      groundingdino/models/GroundingDINO/groundingdino.py
  9. 26
      groundingdino/models/GroundingDINO/ms_deform_attn.py
  10. 21
      groundingdino/models/GroundingDINO/transformer.py
  11. 6
      groundingdino/models/GroundingDINO/transformer_vanilla.py
  12. 5
      groundingdino/models/GroundingDINO/utils.py
  13. 5
      groundingdino/models/__init__.py
  14. 6
      groundingdino/models/registry.py
  15. 2
      groundingdino/version.py

@ -2,8 +2,6 @@ import argparse
import os import os
import sys import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import numpy as np import numpy as np
import torch import torch
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
@ -14,6 +12,8 @@ from groundingdino.util import box_ops
from groundingdino.util.slconfig import SLConfig from groundingdino.util.slconfig import SLConfig
from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
def plot_boxes_to_image(image_pil, tgt): def plot_boxes_to_image(image_pil, tgt):
H, W = tgt["size"] H, W = tgt["size"]
@ -88,7 +88,6 @@ def get_grounding_output(model, image, caption, box_threshold, text_threshold, w
logits = outputs["pred_logits"].cpu().sigmoid()[0] # (nq, 256) logits = outputs["pred_logits"].cpu().sigmoid()[0] # (nq, 256)
boxes = outputs["pred_boxes"].cpu()[0] # (nq, 4) boxes = outputs["pred_boxes"].cpu()[0] # (nq, 4)
logits.shape[0] logits.shape[0]
# filter output # filter output
logits_filt = logits.clone() logits_filt = logits.clone()
@ -126,12 +125,8 @@ if __name__ == "__main__":
"--output_dir", "-o", type=str, default="outputs", required=True, help="output directory" "--output_dir", "-o", type=str, default="outputs", required=True, help="output directory"
) )
parser.add_argument( parser.add_argument("--box_threshold", type=float, default=0.3, help="box threshold")
"--box_threshold", type=float, default=0.3, help="box threshold" parser.add_argument("--text_threshold", type=float, default=0.25, help="text threshold")
)
parser.add_argument(
"--text_threshold", type=float, default=0.25, help="text threshold"
)
args = parser.parse_args() args = parser.parse_args()
# cfg # cfg

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Conditional DETR # Conditional DETR
# Copyright (c) 2021 Microsoft. All Rights Reserved. # Copyright (c) 2021 Microsoft. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Conditional DETR # Conditional DETR
# Copyright (c) 2021 Microsoft. All Rights Reserved. # Copyright (c) 2021 Microsoft. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# DINO # DINO
# Copyright (c) 2022 IDEA. All Rights Reserved. # Copyright (c) 2022 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# DINO # DINO
# Copyright (c) 2022 IDEA. All Rights Reserved. # Copyright (c) 2022 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,3 +1,10 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint import torch.utils.checkpoint as checkpoint

@ -1,3 +1,10 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F

@ -1,6 +1,7 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# DINO # Grounding DINO
# Copyright (c) 2022 IDEA. All Rights Reserved. # url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# Conditional DETR model and criterion classes. # Conditional DETR model and criterion classes.

@ -1,18 +1,9 @@
# coding=utf-8 # ------------------------------------------------------------------------
# Copyright 2022 The IDEA Authors. All rights reserved. # Grounding DINO
# # url: https://github.com/IDEA-Research/GroundingDINO
# Licensed under the Apache License, Version 2.0 (the "License"); # Copyright (c) 2023 IDEA. All Rights Reserved.
# you may not use this file except in compliance with the License. # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# You may obtain a copy of the License at # ------------------------------------------------------------------------
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ------------------------------------------------------------------------------------------------
# Deformable DETR # Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved. # Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
@ -26,12 +17,14 @@
import math import math
import warnings import warnings
from typing import Optional from typing import Optional
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
from torch.autograd import Function from torch.autograd import Function
from torch.autograd.function import once_differentiable from torch.autograd.function import once_differentiable
from torch.nn.init import constant_, xavier_uniform_ from torch.nn.init import constant_, xavier_uniform_
from groundingdino import _C from groundingdino import _C
@ -290,7 +283,6 @@ class MultiScaleDeformableAttention(nn.Module):
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
value = self.value_proj(value) value = self.value_proj(value)
if key_padding_mask is not None: if key_padding_mask is not None:
value = value.masked_fill(key_padding_mask[..., None], float(0)) value = value.masked_fill(key_padding_mask[..., None], float(0))
@ -339,7 +331,6 @@ class MultiScaleDeformableAttention(nn.Module):
sampling_locations = sampling_locations.float() sampling_locations = sampling_locations.float()
attention_weights = attention_weights.float() attention_weights = attention_weights.float()
output = MultiScaleDeformableAttnFunction.apply( output = MultiScaleDeformableAttnFunction.apply(
value, value,
spatial_shapes, spatial_shapes,
@ -416,4 +407,3 @@ def create_dummy_func(func, dependency, message=""):
raise ImportError(err) raise ImportError(err)
return _dummy return _dummy

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# DINO # DINO
# Copyright (c) 2022 IDEA. All Rights Reserved. # Copyright (c) 2022 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
@ -744,7 +749,13 @@ class DeformableTransformerEncoderLayer(nn.Module):
super().__init__() super().__init__()
# self attention # self attention
self.self_attn = MSDeformAttn(embed_dim=d_model, num_levels=n_levels, num_heads=n_heads, num_points=n_points, batch_first=True) self.self_attn = MSDeformAttn(
embed_dim=d_model,
num_levels=n_levels,
num_heads=n_heads,
num_points=n_points,
batch_first=True,
)
self.dropout1 = nn.Dropout(dropout) self.dropout1 = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model) self.norm1 = nn.LayerNorm(d_model)
@ -804,7 +815,13 @@ class DeformableTransformerDecoderLayer(nn.Module):
super().__init__() super().__init__()
# cross attention # cross attention
self.cross_attn = MSDeformAttn(embed_dim=d_model, num_levels=n_levels, num_heads=n_heads, num_points=n_points, batch_first=True) self.cross_attn = MSDeformAttn(
embed_dim=d_model,
num_levels=n_levels,
num_heads=n_heads,
num_points=n_points,
batch_first=True,
)
self.dropout1 = nn.Dropout(dropout) if dropout > 0 else nn.Identity() self.dropout1 = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
self.norm1 = nn.LayerNorm(d_model) self.norm1 = nn.LayerNorm(d_model)

@ -1,3 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved # Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
""" """

@ -1,6 +1,7 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# DINO # Grounding DINO
# Copyright (c) 2022 IDEA. All Rights Reserved. # url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------

@ -1,6 +1,7 @@
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# DINO # Grounding DINO
# Copyright (c) 2022 IDEA. All Rights Reserved. # url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details] # Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------ # ------------------------------------------------------------------------
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

@ -1,3 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# @Author: Yihao Chen # @Author: Yihao Chen
# @Date: 2021-08-16 16:03:17 # @Date: 2021-08-16 16:03:17

@ -1 +1 @@
__version__ = '0.1.0' __version__ = "0.1.0"

Loading…
Cancel
Save