From b27438e1268d3ef645c6f8ce10c184bb7d57825d Mon Sep 17 00:00:00 2001
From: David Benjamin <davidben@google.com>
Date: Tue, 24 Aug 2021 14:24:38 -0400
Subject: [PATCH] Rewrite name constraints matching with CBS.

See also 8393de42498f8be75cf0353f5c9f906a43a748d2 from upstream and
CBS-2021-3712. But rather than do that, I've rewritten it with CBS, so
it's a bit clearer. The previous commit added tests.

Change-Id: Ie52e28f07b9bf805c8730eab7be5d40cb5d558b6
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/49008
Reviewed-by: Adam Langley <agl@google.com>
---
 crypto/bytestring/bytestring_test.cc |  22 ++++
 crypto/bytestring/cbs.c              |   8 ++
 crypto/x509/x509_test.cc             |   3 +
 crypto/x509v3/v3_ncons.c             | 166 ++++++++++++++++++---------
 include/openssl/bytestring.h         |   5 +
 5 files changed, 149 insertions(+), 55 deletions(-)

diff --git a/crypto/bytestring/bytestring_test.cc b/crypto/bytestring/bytestring_test.cc
index 484cd34c1..0877a2e72 100644
--- a/crypto/bytestring/bytestring_test.cc
+++ b/crypto/bytestring/bytestring_test.cc
@@ -115,6 +115,28 @@ TEST(CBSTest, GetPrefixedBad) {
   EXPECT_FALSE(CBS_get_u24_length_prefixed(&data, &prefixed));
 }
 
+TEST(CBSTest, GetUntilFirst) {
+  static const uint8_t kData[] = {0, 1, 2, 3, 0, 1, 2, 3};
+  CBS data;
+  CBS_init(&data, kData, sizeof(kData));
+
+  CBS prefix;
+  EXPECT_FALSE(CBS_get_until_first(&data, &prefix, 4));
+  EXPECT_EQ(CBS_data(&data), kData);
+  EXPECT_EQ(CBS_len(&data), sizeof(kData));
+
+  ASSERT_TRUE(CBS_get_until_first(&data, &prefix, 0));
+  EXPECT_EQ(CBS_len(&prefix), 0u);
+  EXPECT_EQ(CBS_data(&data), kData);
+  EXPECT_EQ(CBS_len(&data), sizeof(kData));
+
+  ASSERT_TRUE(CBS_get_until_first(&data, &prefix, 2));
+  EXPECT_EQ(CBS_data(&prefix), kData);
+  EXPECT_EQ(CBS_len(&prefix), 2u);
+  EXPECT_EQ(CBS_data(&data), kData + 2);
+  EXPECT_EQ(CBS_len(&data), sizeof(kData) - 2);
+}
+
 TEST(CBSTest, GetASN1) {
   static const uint8_t kData1[] = {0x30, 2, 1, 2};
   static const uint8_t kData2[] = {0x30, 3, 1, 2};
diff --git a/crypto/bytestring/cbs.c b/crypto/bytestring/cbs.c
index 5590ec88f..803c97ae5 100644
--- a/crypto/bytestring/cbs.c
+++ b/crypto/bytestring/cbs.c
@@ -216,6 +216,14 @@ int CBS_get_u24_length_prefixed(CBS *cbs, CBS *out) {
   return cbs_get_length_prefixed(cbs, out, 3);
 }
 
+int CBS_get_until_first(CBS *cbs, CBS *out, uint8_t c) {
+  const uint8_t *split = OPENSSL_memchr(CBS_data(cbs), c, CBS_len(cbs));
+  if (split == NULL) {
+    return 0;
+  }
+  return CBS_get_bytes(cbs, out, split - CBS_data(cbs));
+}
+
 // parse_base128_integer reads a big-endian base-128 integer from |cbs| and sets
 // |*out| to the result. This is the encoding used in DER for both high tag
 // number form and OID components.
diff --git a/crypto/x509/x509_test.cc b/crypto/x509/x509_test.cc
index d897ff52e..728bf7aca 100644
--- a/crypto/x509/x509_test.cc
+++ b/crypto/x509/x509_test.cc
@@ -1557,6 +1557,9 @@ TEST(X509Test, NameConstraints) {
        X509_V_ERR_PERMITTED_VIOLATION},
       {GEN_DNS, "foo.example.com", ".unrelated.much.longer.name.example",
        X509_V_ERR_PERMITTED_VIOLATION},
+      // NUL bytes, if not rejected, should not confuse the matching logic.
+      {GEN_DNS, std::string({'a', '\0', 'a'}), std::string({'a', '\0', 'b'}),
+       X509_V_ERR_PERMITTED_VIOLATION},
 
       // Names must be emails.
       {GEN_EMAIL, "not-an-email.example", "not-an-email.example",
diff --git a/crypto/x509v3/v3_ncons.c b/crypto/x509v3/v3_ncons.c
index 593a520e4..35c826e05 100644
--- a/crypto/x509v3/v3_ncons.c
+++ b/crypto/x509v3/v3_ncons.c
@@ -389,25 +389,73 @@ static int nc_dn(X509_NAME *nm, X509_NAME *base)
     return X509_V_OK;
 }
 
+static int starts_with(const CBS *cbs, uint8_t c)
+{
+    return CBS_len(cbs) > 0 && CBS_data(cbs)[0] == c;
+}
+
+static int equal_case(const CBS *a, const CBS *b)
+{
+    if (CBS_len(a) != CBS_len(b)) {
+        return 0;
+    }
+    /* Note we cannot use |OPENSSL_strncasecmp| because that would stop
+     * iterating at NUL. */
+    const uint8_t *a_data = CBS_data(a), *b_data = CBS_data(b);
+    for (size_t i = 0; i < CBS_len(a); i++) {
+        if (OPENSSL_tolower(a_data[i]) != OPENSSL_tolower(b_data[i])) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
+static int has_suffix_case(const CBS *a, const CBS *b)
+{
+    if (CBS_len(a) < CBS_len(b)) {
+        return 0;
+    }
+    CBS copy = *a;
+    CBS_skip(&copy, CBS_len(a) - CBS_len(b));
+    return equal_case(&copy, b);
+}
+
 static int nc_dns(ASN1_IA5STRING *dns, ASN1_IA5STRING *base)
 {
-    char *baseptr = (char *)base->data;
-    char *dnsptr = (char *)dns->data;
+    CBS dns_cbs, base_cbs;
+    CBS_init(&dns_cbs, dns->data, dns->length);
+    CBS_init(&base_cbs, base->data, base->length);
+
     /* Empty matches everything */
-    if (!*baseptr)
+    if (CBS_len(&base_cbs) == 0) {
         return X509_V_OK;
+    }
+
+    /* If |base_cbs| begins with a '.', do a simple suffix comparison. This is
+     * not part of RFC5280, but is part of OpenSSL's original behavior. */
+    if (starts_with(&base_cbs, '.')) {
+        if (has_suffix_case(&dns_cbs, &base_cbs)) {
+            return X509_V_OK;
+        }
+        return X509_V_ERR_PERMITTED_VIOLATION;
+    }
+
     /*
      * Otherwise can add zero or more components on the left so compare RHS
      * and if dns is longer and expect '.' as preceding character.
      */
-    if (dns->length > base->length) {
-        dnsptr += dns->length - base->length;
-        if (*baseptr != '.' && dnsptr[-1] != '.')
+    if (CBS_len(&dns_cbs) > CBS_len(&base_cbs)) {
+        uint8_t dot;
+        if (!CBS_skip(&dns_cbs, CBS_len(&dns_cbs) - CBS_len(&base_cbs) - 1) ||
+            !CBS_get_u8(&dns_cbs, &dot) ||
+            dot != '.') {
             return X509_V_ERR_PERMITTED_VIOLATION;
+        }
     }
 
-    if (OPENSSL_strcasecmp(baseptr, dnsptr))
+    if (!equal_case(&dns_cbs, &base_cbs)) {
         return X509_V_ERR_PERMITTED_VIOLATION;
+    }
 
     return X509_V_OK;
 
@@ -415,86 +463,94 @@ static int nc_dns(ASN1_IA5STRING *dns, ASN1_IA5STRING *base)
 
 static int nc_email(ASN1_IA5STRING *eml, ASN1_IA5STRING *base)
 {
-    const char *baseptr = (char *)base->data;
-    const char *emlptr = (char *)eml->data;
-
-    const char *baseat = strchr(baseptr, '@');
-    const char *emlat = strchr(emlptr, '@');
-    if (!emlat)
+    CBS eml_cbs, base_cbs;
+    CBS_init(&eml_cbs, eml->data, eml->length);
+    CBS_init(&base_cbs, base->data, base->length);
+
+    /* TODO(davidben): In OpenSSL 1.1.1, this switched from the first '@' to the
+     * last one. Match them here, or perhaps do an actual parse. Looks like
+     * multiple '@'s may be allowed in quoted strings. */
+    CBS eml_local, base_local;
+    if (!CBS_get_until_first(&eml_cbs, &eml_local, '@')) {
         return X509_V_ERR_UNSUPPORTED_NAME_SYNTAX;
+    }
+    int base_has_at = CBS_get_until_first(&base_cbs, &base_local, '@');
+
     /* Special case: inital '.' is RHS match */
-    if (!baseat && (*baseptr == '.')) {
-        if (eml->length > base->length) {
-            emlptr += eml->length - base->length;
-            if (!OPENSSL_strcasecmp(baseptr, emlptr))
-                return X509_V_OK;
+    if (!base_has_at && starts_with(&base_cbs, '.')) {
+        if (has_suffix_case(&eml_cbs, &base_cbs)) {
+            return X509_V_OK;
         }
         return X509_V_ERR_PERMITTED_VIOLATION;
     }
 
     /* If we have anything before '@' match local part */
-
-    if (baseat) {
-        if (baseat != baseptr) {
-            if ((baseat - baseptr) != (emlat - emlptr))
-                return X509_V_ERR_PERMITTED_VIOLATION;
+    if (base_has_at) {
+        /* TODO(davidben): This interprets a constraint of "@example.com" as
+         * "example.com", which is not part of RFC5280. */
+        if (CBS_len(&base_local) > 0) {
             /* Case sensitive match of local part */
-            if (strncmp(baseptr, emlptr, emlat - emlptr))
+            if (!CBS_mem_equal(&base_local, CBS_data(&eml_local),
+                               CBS_len(&eml_local))) {
                 return X509_V_ERR_PERMITTED_VIOLATION;
+            }
         }
         /* Position base after '@' */
-        baseptr = baseat + 1;
+        assert(starts_with(&base_cbs, '@'));
+        CBS_skip(&base_cbs, 1);
     }
-    emlptr = emlat + 1;
+
     /* Just have hostname left to match: case insensitive */
-    if (OPENSSL_strcasecmp(baseptr, emlptr))
+    assert(starts_with(&eml_cbs, '@'));
+    CBS_skip(&eml_cbs, 1);
+    if (!equal_case(&base_cbs, &eml_cbs)) {
         return X509_V_ERR_PERMITTED_VIOLATION;
+    }
 
     return X509_V_OK;
-
 }
 
 static int nc_uri(ASN1_IA5STRING *uri, ASN1_IA5STRING *base)
 {
-    const char *baseptr = (char *)base->data;
-    const char *hostptr = (char *)uri->data;
-    const char *p = strchr(hostptr, ':');
-    int hostlen;
+    CBS uri_cbs, base_cbs;
+    CBS_init(&uri_cbs, uri->data, uri->length);
+    CBS_init(&base_cbs, base->data, base->length);
+
     /* Check for foo:// and skip past it */
-    if (!p || (p[1] != '/') || (p[2] != '/'))
+    CBS scheme;
+    uint8_t byte;
+    if (!CBS_get_until_first(&uri_cbs, &scheme, ':') ||
+        !CBS_skip(&uri_cbs, 1) ||  // Skip the colon
+        !CBS_get_u8(&uri_cbs, &byte) || byte != '/' ||
+        !CBS_get_u8(&uri_cbs, &byte) || byte != '/') {
         return X509_V_ERR_UNSUPPORTED_NAME_SYNTAX;
-    hostptr = p + 3;
-
-    /* Determine length of hostname part of URI */
-
-    /* Look for a port indicator as end of hostname first */
-
-    p = strchr(hostptr, ':');
-    /* Otherwise look for trailing slash */
-    if (!p)
-        p = strchr(hostptr, '/');
+    }
 
-    if (!p)
-        hostlen = strlen(hostptr);
-    else
-        hostlen = p - hostptr;
+    /* Look for a port indicator as end of hostname first. Otherwise look for
+     * trailing slash, or the end of the string.
+     * TODO(davidben): This is not a correct URI parser and mishandles IPv6
+     * literals. */
+    CBS host;
+    if (!CBS_get_until_first(&uri_cbs, &host, ':') &&
+        !CBS_get_until_first(&uri_cbs, &host, '/')) {
+        host = uri_cbs;
+    }
 
-    if (hostlen == 0)
+    if (CBS_len(&host) == 0) {
         return X509_V_ERR_UNSUPPORTED_NAME_SYNTAX;
+    }
 
     /* Special case: inital '.' is RHS match */
-    if (*baseptr == '.') {
-        if (hostlen > base->length) {
-            p = hostptr + hostlen - base->length;
-            if (!OPENSSL_strncasecmp(p, baseptr, base->length))
-                return X509_V_OK;
+    if (starts_with(&base_cbs, '.')) {
+        if (has_suffix_case(&host, &base_cbs)) {
+            return X509_V_OK;
         }
         return X509_V_ERR_PERMITTED_VIOLATION;
     }
 
-    if ((base->length != (int)hostlen)
-        || OPENSSL_strncasecmp(hostptr, baseptr, hostlen))
+    if (!equal_case(&base_cbs, &host)) {
         return X509_V_ERR_PERMITTED_VIOLATION;
+    }
 
     return X509_V_OK;
 
diff --git a/include/openssl/bytestring.h b/include/openssl/bytestring.h
index 180326d26..5ef374201 100644
--- a/include/openssl/bytestring.h
+++ b/include/openssl/bytestring.h
@@ -154,6 +154,11 @@ OPENSSL_EXPORT int CBS_get_u16_length_prefixed(CBS *cbs, CBS *out);
 // returns one on success and zero on error.
 OPENSSL_EXPORT int CBS_get_u24_length_prefixed(CBS *cbs, CBS *out);
 
+// CBS_get_until_first finds the first instance of |c| in |cbs|. If found, it
+// sets |*out| to the text before the match, advances |cbs| over it, and returns
+// one. Otherwise, it returns zero and leaves |cbs| unmodified.
+OPENSSL_EXPORT int CBS_get_until_first(CBS *cbs, CBS *out, uint8_t c);
+
 
 // Parsing ASN.1
 //