protobuf/upb/bazel/amalgamate.py

#!/usr/bin/python
#
# Protocol Buffers - Google's data interchange format
# Copyright 2023 Google LLC.  All rights reserved.
# https://developers.google.com/protocol-buffers/
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
#     * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#     * Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following disclaimer
# in the documentation and/or other materials provided with the
# distribution.
#     * Neither the name of Google LLC nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import sys
import re
import os

INCLUDE_RE = re.compile('^#include "([^"]*)"')

def parse_include(line):
  match = INCLUDE_RE.match(line)
  return match.groups()[0] if match else None

class Amalgamator:
  def __init__(self, h_out, c_out):
    self.include_paths = ["."]
    self.included = set()
    self.output_h = open(h_out, "w")
    self.output_c = open(c_out, "w")
    self.h_out = h_out.split("/")[-1]

  def amalgamate(self, h_files, c_files):
    self.h_files = set(h_files)
    self.output_c.write("/* Amalgamated source file */\n")
    self.output_c.write('#include "%s"\n' % (self.h_out))
    if self.h_out == "ruby-upb.h":
      self.output_h.write("// Ruby is still using proto3 enum semantics for proto2\n")
      self.output_h.write("#define UPB_DISABLE_CLOSED_ENUM_CHECKING\n")

    self.output_h.write("/* Amalgamated source file */\n")

    port_def = self._find_include_file("upb/port/def.inc")
    port_undef = self._find_include_file("upb/port/undef.inc")
    self._process_file(port_def, self.output_h)
    self._process_file(port_def, self.output_c)

    for file in c_files:
      self._process_file(file, self.output_c)

    self._process_file(port_undef, self.output_h)
    self._process_file(port_undef, self.output_c)

  def _process_file(self, infile_name, outfile):
    lines = open(infile_name).readlines()

    has_copyright = lines[0].startswith(
        "// Protocol Buffers - Google's data interchange format"
    )
    if has_copyright:
      while not lines[0].startswith(
          "// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH"
          " DAMAGE"
      ) and not lines[0].startswith(
          "// https://developers.google.com/open-source/licenses/bsd"
      ):
        lines.pop(0)
      lines.pop(0)

    for line in lines:
      if not self._process_include(line):
        outfile.write(line)

  def _find_include_file(self, name):
    for h_file in self.h_files:
      if h_file.endswith(name):
        return h_file

  def _process_include(self, line):
    include = parse_include(line)
    if not include:
      return False
    if not (include.startswith("upb") or include.startswith("google")):
      return False
    if include and (include.endswith("port/def.inc") or include.endswith("port/undef.inc")):
      # Skip, we handle this separately
      return True
    if include.endswith("hpp"):
      # Skip, we don't support the amalgamation from C++.
      return True
    if re.search(r"stage\d/", include):
      return True
    elif include in self.included:
      return True
    else:
      # Include this upb header inline.
      h_file = self._find_include_file(include)
      if h_file:
        self.h_files.remove(h_file)
        self.included.add(include)
        self._process_file(h_file, self.output_h)
        return True
      raise RuntimeError("Couldn't find include: " + include + ", h_files=" + repr(self.h_files))

# ---- main ----

c_out = sys.argv[1]
h_out = sys.argv[2]
amalgamator = Amalgamator(h_out, c_out)
c_files = []
h_files = []

for arg in sys.argv[3:]:
  arg = arg.strip()
  if arg.endswith(".h") or arg.endswith(".inc"):
    h_files.append(arg)
  else:
    c_files.append(arg)

amalgamator.amalgamate(h_files, c_files)
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago			`#!/usr/bin/python`
Added license headers to all files. 4 years ago			`#`
Reformat copyright headers PiperOrigin-RevId: 554509301 1 year ago			`# Protocol Buffers - Google's data interchange format`
			`# Copyright 2023 Google LLC. All rights reserved.`
			`# https://developers.google.com/protocol-buffers/`
Added license headers to all files. 4 years ago			`#`
			`# Redistribution and use in source and binary forms, with or without`
Reformat copyright headers PiperOrigin-RevId: 554509301 1 year ago			`# modification, are permitted provided that the following conditions are`
			`# met:`
			`#`
Added license headers to all files. 4 years ago			`# * Redistributions of source code must retain the above copyright`
Reformat copyright headers PiperOrigin-RevId: 554509301 1 year ago			`# notice, this list of conditions and the following disclaimer.`
			`# * Redistributions in binary form must reproduce the above`
			`# copyright notice, this list of conditions and the following disclaimer`
			`# in the documentation and/or other materials provided with the`
			`# distribution.`
			`# * Neither the name of Google LLC nor the names of its`
			`# contributors may be used to endorse or promote products derived from`
			`# this software without specific prior written permission.`
Added license headers to all files. 4 years ago			`#`
Reformat copyright headers PiperOrigin-RevId: 554509301 1 year ago			`# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS`
			`# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT`
			`# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR`
			`# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT`
			`# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,`
			`# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT`
			`# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,`
			`# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY`
			`# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT`
			`# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE`
			`# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago
			`import sys`
			`import re`
Fixed amalgamation and CMake build. 6 years ago			`import os`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago
upb: tag message/types.h as an export PiperOrigin-RevId: 563149248 1 year ago			`INCLUDE_RE = re.compile('^#include "([^"]*)"')`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago
			`def parse_include(line):`
			`match = INCLUDE_RE.match(line)`
			`return match.groups()[0] if match else None`

			`class Amalgamator:`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`def __init__(self, h_out, c_out):`
Fixed amalgamation and CMake build. 6 years ago			`self.include_paths = ["."]`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`self.included = set()`
			`self.output_h = open(h_out, "w")`
			`self.output_c = open(c_out, "w")`
			`self.h_out = h_out.split("/")[-1]`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`def amalgamate(self, h_files, c_files):`
			`self.h_files = set(h_files)`
Fixes for Google import. 6 years ago			`self.output_c.write("/* Amalgamated source file */\n")`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`self.output_c.write('#include "%s"\n' % (self.h_out))`
			`if self.h_out == "ruby-upb.h":`
Two Ruby changes to unblock the release 3 years ago			`self.output_h.write("// Ruby is still using proto3 enum semantics for proto2\n")`
Implement edition 2023 support in all Ruby runtimes. Three of these runtimes are based on upb, and the fourth is based on the Java runtime. Both of these already have editions support, so this was mostly just a matter of: - Advertising support to allow editions codegen - Stripping features from the runtime options - Hooking up conformance tests - Adding some lightweight editions tests There are also a few minor orthogonal fixes included here: - Ruby's upb hack for treating all enums as open enums needed tweaking - The `enable_editions` flag is no longer needed in our internal proto rules PiperOrigin-RevId: 616256211 9 months ago			`self.output_h.write("#define UPB_DISABLE_CLOSED_ENUM_CHECKING\n")`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago
Fixes for Google import. 6 years ago			`self.output_h.write("/* Amalgamated source file */\n")`
Fixed amalgamation. 6 years ago
move portability stuff into upb/port/ Also delete redundant system #includes that are already pulled in by port/def.inc PiperOrigin-RevId: 486398989 2 years ago			`port_def = self._find_include_file("upb/port/def.inc")`
			`port_undef = self._find_include_file("upb/port/undef.inc")`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`self._process_file(port_def, self.output_h)`
			`self._process_file(port_def, self.output_c)`

			`for file in c_files:`
			`self._process_file(file, self.output_c)`
Fixed amalgamation and CMake build. 6 years ago
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`self._process_file(port_undef, self.output_h)`
			`self._process_file(port_undef, self.output_c)`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago
			`def _process_file(self, infile_name, outfile):`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`lines = open(infile_name).readlines()`
Updated amalgamator to avoid duplicating license blocks. 4 years ago
Update amalgamate.py to reflect new copyright headers PiperOrigin-RevId: 556998294 1 year ago			`has_copyright = lines[0].startswith(`
			`"// Protocol Buffers - Google's data interchange format"`
			`)`
Updated amalgamator to avoid duplicating license blocks. 4 years ago			`if has_copyright:`
Update amalgamate.py to reflect new copyright headers PiperOrigin-RevId: 556998294 1 year ago			`while not lines[0].startswith(`
			`"// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH"`
			`" DAMAGE"`
Shorten our license headers into an abbreviated form that references LICENSE instead of including it in full. PiperOrigin-RevId: 564874468 1 year ago			`) and not lines[0].startswith(`
			`"// https://developers.google.com/open-source/licenses/bsd"`
Update amalgamate.py to reflect new copyright headers PiperOrigin-RevId: 556998294 1 year ago			`):`
Updated amalgamator to avoid duplicating license blocks. 4 years ago			`lines.pop(0)`
			`lines.pop(0)`

			`for line in lines:`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`if not self._process_include(line):`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago			`outfile.write(line)`

Updated staleness test and amalgamator to work cross-repo. 2 years ago			`def _find_include_file(self, name):`
			`for h_file in self.h_files:`
			`if h_file.endswith(name):`
			`return h_file`

			`def _process_include(self, line):`
Separated C++ wrappers into separate files in a backward-compatible way. (#265) This makes both the C (.h) and C++ (.hpp) files read nicer and keeps the core of upb C-only. Existing users of the C++ wrappers will have to add manual #includes of the .hpp files. 5 years ago			`include = parse_include(line)`
			`if not include:`
			`return False`
Amalgamation no longer bundles wyhash, but #includes it. Also fixed a few spelling mistakes. 4 years ago			`if not (include.startswith("upb") or include.startswith("google")):`
Separated C++ wrappers into separate files in a backward-compatible way. (#265) This makes both the C (.h) and C++ (.hpp) files read nicer and keeps the core of upb C-only. Existing users of the C++ wrappers will have to add manual #includes of the .hpp files. 5 years ago			`return False`
move portability stuff into upb/port/ Also delete redundant system #includes that are already pulled in by port/def.inc PiperOrigin-RevId: 486398989 2 years ago			`if include and (include.endswith("port/def.inc") or include.endswith("port/undef.inc")):`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`# Skip, we handle this separately`
			`return True`
Separated C++ wrappers into separate files in a backward-compatible way. (#265) This makes both the C (.h) and C++ (.hpp) files read nicer and keeps the core of upb C-only. Existing users of the C++ wrappers will have to add manual #includes of the .hpp files. 5 years ago			`if include.endswith("hpp"):`
			`# Skip, we don't support the amalgamation from C++.`
			`return True`
Fixed layering check violations once and for all in upb bootstrapping. Our bootstrapping setup compiles multiple versions of the generated code for `descriptor.proto` and `plugin.proto`, one for each stage of the bootstrap. For source files (`.c`), we can always select the correct version of the file in the BUILD rules, but for header files we need to make sure the correct stage's file is always selected via `#include`. Previously we used `cc_library(includes=[])` to make it appear as though our bootstrapped headers had the same names as the "real" headers. This allowed a lot of the code to be agnostic to whether a bootstrap header was being used, which simplified things because we did not have to change the code performing the `#include`. Unfortunately, due to build system limitations, this sometimes led to the incorrect header getting included. This should not have been possible, because we had a clean BUILD graph that should have removed all ambiguity about which header should be available. But in non-sandboxed builds, the compiler was able to find headers that were not actually in `deps=[]`, and worse it preferred those headers over the headers that actually were in `deps=[]`. This led to unintended results and errors about layering check violations. This CL fixes the problem by removing all use of `includes=[]`. We now spell a full pathname to all bootstrap headers, so this class of errors is no longer possible. Unfortunately this adds some complexity, as we have to hard-code these full paths in several places. A nice improvement in this CL is that `bootstrap_upb_proto_library()` can now only be used for bootstrapping; it only exposes the `descriptor_bootstrap.h` / `plugin_bootstrap.h` files. Anyone wanting to use the normal `net/proto2/proto/descriptor.upb.h` file should depend on `//net/proto2/proto:descriptor_upb_c_proto` target instead. PiperOrigin-RevId: 664953196 4 months ago			`if re.search(r"stage\d/", include):`
			`return True`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`elif include in self.included:`
			`return True`
Separated C++ wrappers into separate files in a backward-compatible way. (#265) This makes both the C (.h) and C++ (.hpp) files read nicer and keeps the core of upb C-only. Existing users of the C++ wrappers will have to add manual #includes of the .hpp files. 5 years ago			`else:`
			`# Include this upb header inline.`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`h_file = self._find_include_file(include)`
			`if h_file:`
			`self.h_files.remove(h_file)`
Separated C++ wrappers into separate files in a backward-compatible way. (#265) This makes both the C (.h) and C++ (.hpp) files read nicer and keeps the core of upb C-only. Existing users of the C++ wrappers will have to add manual #includes of the .hpp files. 5 years ago			`self.included.add(include)`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`self._process_file(h_file, self.output_h)`
			`return True`
			`raise RuntimeError("Couldn't find include: " + include + ", h_files=" + repr(self.h_files))`
Amalgamated distribution (upb.c/upb.h) tool. There are a number of tweaks to get this to work: - The #include dependence graph wasn't quite complete, and I had to add a few #includes to get the tool to work. - I had to change a number of symbol names to avoid conflicts between 'static' definitions in different .c files. This could be avoided if the tool were smart enough to rename static symbols to have unique prefixes instead, but (i) this requires semantic understanding of C, and (ii) the macro-defined static functions (e.g., handlers for primitive types in several places) would probably trip this up. Verified that the resulting upb.h/upb.c compiles and doesn't have any unresolved references. 10 years ago
			`# ---- main ----`

Updated staleness test and amalgamator to work cross-repo. 2 years ago			`c_out = sys.argv[1]`
			`h_out = sys.argv[2]`
			`amalgamator = Amalgamator(h_out, c_out)`
			`c_files = []`
			`h_files = []`
Fixed amalgamation and CMake build. 6 years ago
Created an amalgamation without handlers, and fixed some bugs. (#283) * Created amalgamation with upb_msg but no handlers. * Bugfix for upb_array_resize(). * Renamed "lite" amalgamation to "core", to avoid confusion. Traditionally "lite" has meant "without reflection", but here we mean it as "without handlers-based code." * Build fixes from CI tests. * Removed some more C++-style comments. * Fix for out-of-order statements. 5 years ago			`for arg in sys.argv[3:]:`
Fixed amalgamation and CMake build. 6 years ago			`arg = arg.strip()`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`if arg.endswith(".h") or arg.endswith(".inc"):`
			`h_files.append(arg)`
Fixed amalgamation and CMake build. 6 years ago			`else:`
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`c_files.append(arg)`
Fixed amalgamation. 6 years ago
Updated staleness test and amalgamator to work cross-repo. 2 years ago			`amalgamator.amalgamate(h_files, c_files)`