update codes and demos

release_code
SlongLiu 2 years ago
parent cc02608066
commit 08bd3c2e9b
  1. 13
      demo/inference_on_a_image.py
  2. 5
      groundingdino/models/GroundingDINO/__init__.py
  3. 5
      groundingdino/models/GroundingDINO/backbone/backbone.py
  4. 5
      groundingdino/models/GroundingDINO/backbone/position_encoding.py
  5. 5
      groundingdino/models/GroundingDINO/backbone/swin_transformer.py
  6. 7
      groundingdino/models/GroundingDINO/bertwarper.py
  7. 7
      groundingdino/models/GroundingDINO/fuse_modules.py
  8. 5
      groundingdino/models/GroundingDINO/groundingdino.py
  9. 26
      groundingdino/models/GroundingDINO/ms_deform_attn.py
  10. 21
      groundingdino/models/GroundingDINO/transformer.py
  11. 6
      groundingdino/models/GroundingDINO/transformer_vanilla.py
  12. 5
      groundingdino/models/GroundingDINO/utils.py
  13. 5
      groundingdino/models/__init__.py
  14. 6
      groundingdino/models/registry.py
  15. 2
      groundingdino/version.py

@ -2,8 +2,6 @@ import argparse
import os
import sys
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
import numpy as np
import torch
from PIL import Image, ImageDraw, ImageFont
@ -14,6 +12,8 @@ from groundingdino.util import box_ops
from groundingdino.util.slconfig import SLConfig
from groundingdino.util.utils import clean_state_dict, get_phrases_from_posmap
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
def plot_boxes_to_image(image_pil, tgt):
H, W = tgt["size"]
@ -89,7 +89,6 @@ def get_grounding_output(model, image, caption, box_threshold, text_threshold, w
boxes = outputs["pred_boxes"].cpu()[0] # (nq, 4)
logits.shape[0]
# filter output
logits_filt = logits.clone()
boxes_filt = boxes.clone()
@ -126,12 +125,8 @@ if __name__ == "__main__":
"--output_dir", "-o", type=str, default="outputs", required=True, help="output directory"
)
parser.add_argument(
"--box_threshold", type=float, default=0.3, help="box threshold"
)
parser.add_argument(
"--text_threshold", type=float, default=0.25, help="text threshold"
)
parser.add_argument("--box_threshold", type=float, default=0.3, help="box threshold")
parser.add_argument("--text_threshold", type=float, default=0.25, help="text threshold")
args = parser.parse_args()
# cfg

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Conditional DETR
# Copyright (c) 2021 Microsoft. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Conditional DETR
# Copyright (c) 2021 Microsoft. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# DINO
# Copyright (c) 2022 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# DINO
# Copyright (c) 2022 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]

@ -1,3 +1,10 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
import torch
import torch.nn.functional as F
import torch.utils.checkpoint as checkpoint

@ -1,3 +1,10 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
import torch
import torch.nn as nn
import torch.nn.functional as F

@ -1,6 +1,7 @@
# ------------------------------------------------------------------------
# DINO
# Copyright (c) 2022 IDEA. All Rights Reserved.
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Conditional DETR model and criterion classes.

@ -1,18 +1,9 @@
# coding=utf-8
# Copyright 2022 The IDEA Authors. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ------------------------------------------------------------------------------------------------
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Deformable DETR
# Copyright (c) 2020 SenseTime. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
@ -26,12 +17,14 @@
import math
import warnings
from typing import Optional
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Function
from torch.autograd.function import once_differentiable
from torch.nn.init import constant_, xavier_uniform_
from groundingdino import _C
@ -290,7 +283,6 @@ class MultiScaleDeformableAttention(nn.Module):
assert (spatial_shapes[:, 0] * spatial_shapes[:, 1]).sum() == num_value
value = self.value_proj(value)
if key_padding_mask is not None:
value = value.masked_fill(key_padding_mask[..., None], float(0))
@ -339,7 +331,6 @@ class MultiScaleDeformableAttention(nn.Module):
sampling_locations = sampling_locations.float()
attention_weights = attention_weights.float()
output = MultiScaleDeformableAttnFunction.apply(
value,
spatial_shapes,
@ -416,4 +407,3 @@ def create_dummy_func(func, dependency, message=""):
raise ImportError(err)
return _dummy

@ -1,4 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# DINO
# Copyright (c) 2022 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
@ -744,7 +749,13 @@ class DeformableTransformerEncoderLayer(nn.Module):
super().__init__()
# self attention
self.self_attn = MSDeformAttn(embed_dim=d_model, num_levels=n_levels, num_heads=n_heads, num_points=n_points, batch_first=True)
self.self_attn = MSDeformAttn(
embed_dim=d_model,
num_levels=n_levels,
num_heads=n_heads,
num_points=n_points,
batch_first=True,
)
self.dropout1 = nn.Dropout(dropout)
self.norm1 = nn.LayerNorm(d_model)
@ -804,7 +815,13 @@ class DeformableTransformerDecoderLayer(nn.Module):
super().__init__()
# cross attention
self.cross_attn = MSDeformAttn(embed_dim=d_model, num_levels=n_levels, num_heads=n_heads, num_points=n_points, batch_first=True)
self.cross_attn = MSDeformAttn(
embed_dim=d_model,
num_levels=n_levels,
num_heads=n_heads,
num_points=n_points,
batch_first=True,
)
self.dropout1 = nn.Dropout(dropout) if dropout > 0 else nn.Identity()
self.norm1 = nn.LayerNorm(d_model)

@ -1,3 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Copyright (c) Aishwarya Kamath & Nicolas Carion. Licensed under the Apache License 2.0. All Rights Reserved
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""

@ -1,6 +1,7 @@
# ------------------------------------------------------------------------
# DINO
# Copyright (c) 2022 IDEA. All Rights Reserved.
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------

@ -1,6 +1,7 @@
# ------------------------------------------------------------------------
# DINO
# Copyright (c) 2022 IDEA. All Rights Reserved.
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved

@ -1,3 +1,9 @@
# ------------------------------------------------------------------------
# Grounding DINO
# url: https://github.com/IDEA-Research/GroundingDINO
# Copyright (c) 2023 IDEA. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 [see LICENSE for details]
# ------------------------------------------------------------------------
# -*- coding: utf-8 -*-
# @Author: Yihao Chen
# @Date: 2021-08-16 16:03:17

@ -1 +1 @@
__version__ = '0.1.0'
__version__ = "0.1.0"

Loading…
Cancel
Save