protobuf/upb/msg_internal.h

/*
 * Copyright (c) 2009-2021, Google LLC
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of Google LLC nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL Google LLC BE LIABLE FOR ANY DIRECT,
 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

/*
** Our memory representation for parsing tables and messages themselves.
** Functions in this file are used by generated code and possibly reflection.
**
** The definitions in this file are internal to upb.
**/

#ifndef UPB_MSG_INT_H_
#define UPB_MSG_INT_H_

#include <stdlib.h>
#include <string.h>

#include "upb/collections/map_internal.h"
#include "upb/extension_registry.h"
#include "upb/internal/table.h"
#include "upb/msg.h"
#include "upb/upb.h"

// Must be last.
#include "upb/port/def.inc"

#ifdef __cplusplus
extern "C" {
#endif

/** upb_*Int* conversion routines ********************************************/

UPB_INLINE int32_t _upb_Int32_FromI(int v) { return (int32_t)v; }

UPB_INLINE int64_t _upb_Int64_FromLL(long long v) { return (int64_t)v; }

UPB_INLINE uint32_t _upb_UInt32_FromU(unsigned v) { return (uint32_t)v; }

UPB_INLINE uint64_t _upb_UInt64_FromULL(unsigned long long v) {
  return (uint64_t)v;
}

extern const float kUpb_FltInfinity;
extern const double kUpb_Infinity;

/** upb_MiniTable *************************************************************/

/* upb_MiniTable represents the memory layout of a given upb_MessageDef.  The
 * members are public so generated code can initialize them, but users MUST NOT
 * read or write any of its members. */

typedef struct {
  uint32_t number;
  uint16_t offset;
  int16_t presence;       // If >0, hasbit_index.  If <0, ~oneof_index
  uint16_t submsg_index;  // kUpb_NoSub if descriptortype != MESSAGE/GROUP/ENUM
  uint8_t descriptortype;
  uint8_t mode; /* upb_FieldMode | upb_LabelFlags |
                   (upb_FieldRep << kUpb_FieldRep_Shift) */
} upb_MiniTable_Field;

#define kUpb_NoSub ((uint16_t)-1)

typedef enum {
  kUpb_FieldMode_Map = 0,
  kUpb_FieldMode_Array = 1,
  kUpb_FieldMode_Scalar = 2,
} upb_FieldMode;

// Mask to isolate the upb_FieldMode from field.mode.
#define kUpb_FieldMode_Mask 3

/* Extra flags on the mode field. */
typedef enum {
  kUpb_LabelFlags_IsPacked = 4,
  kUpb_LabelFlags_IsExtension = 8,
  // Indicates that this descriptor type is an "alternate type":
  //   - for Int32, this indicates that the actual type is Enum (but was
  //     rewritten to Int32 because it is an open enum that requires no check).
  //   - for Bytes, this indicates that the actual type is String (but does
  //     not require any UTF-8 check).
  kUpb_LabelFlags_IsAlternate = 16,
} upb_LabelFlags;

// Note: we sort by this number when calculating layout order.
typedef enum {
  kUpb_FieldRep_1Byte = 0,
  kUpb_FieldRep_4Byte = 1,
  kUpb_FieldRep_StringView = 2,
  kUpb_FieldRep_8Byte = 3,

  kUpb_FieldRep_Shift = 6,  // Bit offset of the rep in upb_MiniTable_Field.mode
  kUpb_FieldRep_Max = kUpb_FieldRep_8Byte,
} upb_FieldRep;

UPB_INLINE upb_FieldMode upb_FieldMode_Get(const upb_MiniTable_Field* field) {
  return (upb_FieldMode)(field->mode & 3);
}

UPB_INLINE bool upb_IsRepeatedOrMap(const upb_MiniTable_Field* field) {
  /* This works because upb_FieldMode has no value 3. */
  return !(field->mode & kUpb_FieldMode_Scalar);
}

UPB_INLINE bool upb_IsSubMessage(const upb_MiniTable_Field* field) {
  return field->descriptortype == kUpb_FieldType_Message ||
         field->descriptortype == kUpb_FieldType_Group;
}

struct upb_Decoder;
struct upb_MiniTable;

typedef const char* _upb_FieldParser(struct upb_Decoder* d, const char* ptr,
                                     upb_Message* msg, intptr_t table,
                                     uint64_t hasbits, uint64_t data);

typedef struct {
  uint64_t field_data;
  _upb_FieldParser* field_parser;
} _upb_FastTable_Entry;

typedef struct {
  uint32_t mask_limit;   // Limit enum value that can be tested with mask.
  uint32_t value_count;  // Number of values after the bitfield.
  uint32_t data[];       // Bitmask + enumerated values follow.
} upb_MiniTable_Enum;

typedef enum {
  _kUpb_FastEnumCheck_ValueIsInEnum = 0,
  _kUpb_FastEnumCheck_ValueIsNotInEnum = 1,
  _kUpb_FastEnumCheck_CannotCheckFast = 2,
} _kUpb_FastEnumCheck_Status;

UPB_INLINE _kUpb_FastEnumCheck_Status
_upb_MiniTable_CheckEnumValueFast(const upb_MiniTable_Enum* e, uint32_t val) {
  if (UPB_UNLIKELY(val >= 64)) return _kUpb_FastEnumCheck_CannotCheckFast;
  uint64_t mask = e->data[0] | ((uint64_t)e->data[1] << 32);
  return (mask & (1ULL << val)) ? _kUpb_FastEnumCheck_ValueIsInEnum
                                : _kUpb_FastEnumCheck_ValueIsNotInEnum;
}

UPB_INLINE bool _upb_MiniTable_CheckEnumValueSlow(const upb_MiniTable_Enum* e,
                                                  uint32_t val) {
  if (val < e->mask_limit) return e->data[val / 32] & (1ULL << (val % 32));
  // OPT: binary search long lists?
  const uint32_t* start = &e->data[e->mask_limit / 32];
  const uint32_t* limit = &e->data[(e->mask_limit / 32) + e->value_count];
  for (const uint32_t* p = start; p < limit; p++) {
    if (*p == val) return true;
  }
  return false;
}

// Validates enum value against range defined by enum mini table.
UPB_INLINE bool upb_MiniTable_Enum_CheckValue(const upb_MiniTable_Enum* e,
                                              uint32_t val) {
  _kUpb_FastEnumCheck_Status status = _upb_MiniTable_CheckEnumValueFast(e, val);
  if (UPB_UNLIKELY(status == _kUpb_FastEnumCheck_CannotCheckFast)) {
    return _upb_MiniTable_CheckEnumValueSlow(e, val);
  }
  return status == _kUpb_FastEnumCheck_ValueIsInEnum ? true : false;
}

typedef union {
  const struct upb_MiniTable* submsg;
  const upb_MiniTable_Enum* subenum;
} upb_MiniTable_Sub;

typedef enum {
  kUpb_ExtMode_NonExtendable = 0,  // Non-extendable message.
  kUpb_ExtMode_Extendable = 1,     // Normal extendable message.
  kUpb_ExtMode_IsMessageSet = 2,   // MessageSet message.
  kUpb_ExtMode_IsMessageSet_ITEM =
      3,  // MessageSet item (temporary only, see decode.c)

  // During table building we steal a bit to indicate that the message is a map
  // entry.  *Only* used during table building!
  kUpb_ExtMode_IsMapEntry = 4,
} upb_ExtMode;

struct upb_MiniTable {
  const upb_MiniTable_Sub* subs;
  const upb_MiniTable_Field* fields;
  /* Must be aligned to sizeof(void*).  Doesn't include internal members like
   * unknown fields, extension dict, pointer to msglayout, etc. */
  uint16_t size;
  uint16_t field_count;
  uint8_t ext;  // upb_ExtMode, declared as uint8_t so sizeof(ext) == 1
  uint8_t dense_below;
  uint8_t table_mask;
  uint8_t required_count;  // Required fields have the lowest hasbits.
  /* To statically initialize the tables of variable length, we need a flexible
   * array member, and we need to compile in gnu99 mode (constant initialization
   * of flexible array members is a GNU extension, not in C99 unfortunately. */
  _upb_FastTable_Entry fasttable[];
};

struct upb_MiniTable_Extension {
  upb_MiniTable_Field field;
  const upb_MiniTable* extendee;
  upb_MiniTable_Sub sub; /* NULL unless submessage or proto2 enum */
};

typedef struct {
  const upb_MiniTable** msgs;
  const upb_MiniTable_Enum** enums;
  const upb_MiniTable_Extension** exts;
  int msg_count;
  int enum_count;
  int ext_count;
} upb_MiniTable_File;

// Computes a bitmask in which the |l->required_count| lowest bits are set,
// except that we skip the lowest bit (because upb never uses hasbit 0).
//
// Sample output:
//    requiredmask(1) => 0b10 (0x2)
//    requiredmask(5) => 0b111110 (0x3e)
UPB_INLINE uint64_t upb_MiniTable_requiredmask(const upb_MiniTable* l) {
  int n = l->required_count;
  assert(0 < n && n <= 63);
  return ((1ULL << n) - 1) << 1;
}

/** upb_Message ***************************************************************/

/* Internal members of a upb_Message that track unknown fields and/or
 * extensions. We can change this without breaking binary compatibility.  We put
 * these before the user's data.  The user's upb_Message* points after the
 * upb_Message_Internal. */

typedef struct {
  /* Total size of this structure, including the data that follows.
   * Must be aligned to 8, which is alignof(upb_Message_Extension) */
  uint32_t size;

  /* Offsets relative to the beginning of this structure.
   *
   * Unknown data grows forward from the beginning to unknown_end.
   * Extension data grows backward from size to ext_begin.
   * When the two meet, we're out of data and have to realloc.
   *
   * If we imagine that the final member of this struct is:
   *   char data[size - overhead];  // overhead =
   * sizeof(upb_Message_InternalData)
   *
   * Then we have:
   *   unknown data: data[0 .. (unknown_end - overhead)]
   *   extensions data: data[(ext_begin - overhead) .. (size - overhead)] */
  uint32_t unknown_end;
  uint32_t ext_begin;
  /* Data follows, as if there were an array:
   *   char data[size - sizeof(upb_Message_InternalData)]; */
} upb_Message_InternalData;

typedef struct {
  upb_Message_InternalData* internal;
  /* Message data follows. */
} upb_Message_Internal;

/* Maps upb_CType -> memory size. */
extern char _upb_CTypeo_size[12];

UPB_INLINE size_t upb_msg_sizeof(const upb_MiniTable* l) {
  return l->size + sizeof(upb_Message_Internal);
}

/* Inline version upb_Message_New(), for internal use */
UPB_INLINE upb_Message* _upb_Message_New(const upb_MiniTable* mini_table,
                                         upb_Arena* arena) {
  size_t size = upb_msg_sizeof(mini_table);
  void* mem = upb_Arena_Malloc(arena, size + sizeof(upb_Message_Internal));
  if (UPB_UNLIKELY(!mem)) return NULL;
  upb_Message* msg = UPB_PTR_AT(mem, sizeof(upb_Message_Internal), upb_Message);
  memset(mem, 0, size);
  return msg;
}

UPB_INLINE upb_Message_Internal* upb_Message_Getinternal(upb_Message* msg) {
  ptrdiff_t size = sizeof(upb_Message_Internal);
  return (upb_Message_Internal*)((char*)msg - size);
}

/* Clears the given message. */
void _upb_Message_Clear(upb_Message* msg, const upb_MiniTable* l);

/* Discards the unknown fields for this message only. */
void _upb_Message_DiscardUnknown_shallow(upb_Message* msg);

/* Adds unknown data (serialized protobuf data) to the given message.  The data
 * is copied into the message instance. */
bool _upb_Message_AddUnknown(upb_Message* msg, const char* data, size_t len,
                             upb_Arena* arena);

/** upb_Message_Extension *****************************************************/

/* The internal representation of an extension is self-describing: it contains
 * enough information that we can serialize it to binary format without needing
 * to look it up in a upb_ExtensionRegistry.
 *
 * This representation allocates 16 bytes to data on 64-bit platforms.  This is
 * rather wasteful for scalars (in the extreme case of bool, it wastes 15
 * bytes). We accept this because we expect messages to be the most common
 * extension type. */
typedef struct {
  const upb_MiniTable_Extension* ext;
  union {
    upb_StringView str;
    void* ptr;
    char scalar_data[8];
  } data;
} upb_Message_Extension;

/* Adds the given extension data to the given message. |ext| is copied into the
 * message instance. This logically replaces any previously-added extension with
 * this number */
upb_Message_Extension* _upb_Message_GetOrCreateExtension(
    upb_Message* msg, const upb_MiniTable_Extension* ext, upb_Arena* arena);

/* Returns an array of extensions for this message. Note: the array is
 * ordered in reverse relative to the order of creation. */
const upb_Message_Extension* _upb_Message_Getexts(const upb_Message* msg,
                                                  size_t* count);

/* Returns an extension for the given field number, or NULL if no extension
 * exists for this field number. */
const upb_Message_Extension* _upb_Message_Getext(
    const upb_Message* msg, const upb_MiniTable_Extension* ext);

void _upb_Message_Clearext(upb_Message* msg,
                           const upb_MiniTable_Extension* ext);

/** Hasbit access *************************************************************/

UPB_INLINE bool _upb_hasbit(const upb_Message* msg, size_t idx) {
  return (*UPB_PTR_AT(msg, idx / 8, const char) & (1 << (idx % 8))) != 0;
}

UPB_INLINE void _upb_sethas(const upb_Message* msg, size_t idx) {
  (*UPB_PTR_AT(msg, idx / 8, char)) |= (char)(1 << (idx % 8));
}

UPB_INLINE void _upb_clearhas(const upb_Message* msg, size_t idx) {
  (*UPB_PTR_AT(msg, idx / 8, char)) &= (char)(~(1 << (idx % 8)));
}

UPB_INLINE size_t _upb_Message_Hasidx(const upb_MiniTable_Field* f) {
  UPB_ASSERT(f->presence > 0);
  return f->presence;
}

UPB_INLINE bool _upb_hasbit_field(const upb_Message* msg,
                                  const upb_MiniTable_Field* f) {
  return _upb_hasbit(msg, _upb_Message_Hasidx(f));
}

UPB_INLINE void _upb_sethas_field(const upb_Message* msg,
                                  const upb_MiniTable_Field* f) {
  _upb_sethas(msg, _upb_Message_Hasidx(f));
}

UPB_INLINE void _upb_clearhas_field(const upb_Message* msg,
                                    const upb_MiniTable_Field* f) {
  _upb_clearhas(msg, _upb_Message_Hasidx(f));
}

/** Oneof case access *********************************************************/

UPB_INLINE uint32_t* _upb_oneofcase(upb_Message* msg, size_t case_ofs) {
  return UPB_PTR_AT(msg, case_ofs, uint32_t);
}

UPB_INLINE uint32_t _upb_getoneofcase(const void* msg, size_t case_ofs) {
  return *UPB_PTR_AT(msg, case_ofs, uint32_t);
}

UPB_INLINE size_t _upb_oneofcase_ofs(const upb_MiniTable_Field* f) {
  UPB_ASSERT(f->presence < 0);
  return ~(ptrdiff_t)f->presence;
}

UPB_INLINE uint32_t* _upb_oneofcase_field(upb_Message* msg,
                                          const upb_MiniTable_Field* f) {
  return _upb_oneofcase(msg, _upb_oneofcase_ofs(f));
}

UPB_INLINE uint32_t _upb_getoneofcase_field(const upb_Message* msg,
                                            const upb_MiniTable_Field* f) {
  return _upb_getoneofcase(msg, _upb_oneofcase_ofs(f));
}

UPB_INLINE bool _upb_has_submsg_nohasbit(const upb_Message* msg, size_t ofs) {
  return *UPB_PTR_AT(msg, ofs, const upb_Message*) != NULL;
}

/* Map entries aren't actually stored, they are only used during parsing.  For
 * parsing, it helps a lot if all map entry messages have the same layout.
 * The compiler and def.c must ensure that all map entries have this layout. */
typedef struct {
  upb_Message_Internal internal;
  union {
    upb_StringView str; /* For str/bytes. */
    upb_value val;      /* For all other types. */
  } k;
  union {
    upb_StringView str; /* For str/bytes. */
    upb_value val;      /* For all other types. */
  } v;
} upb_MapEntry;

#ifdef __cplusplus
} /* extern "C" */
#endif

#include "upb/port/undef.inc"

#endif /* UPB_MSG_INT_H_ */