From 507daf391d1f973cd5226d01361bb4eec9e14766 Mon Sep 17 00:00:00 2001 From: David Benjamin Date: Thu, 23 Jun 2022 00:55:22 -0400 Subject: [PATCH] Check Unicode string encodings in crypto/asn1. This checks validity for UTF8String, BMPString, and UniversalString. When we detach the core types from ASN1_ITEM, this will likely also be reshuffled around, probably into type-specific functions. But for now just get the checks in place. Update-Note: Invalid strings in X.509 certificates and other ASN.1 structures will now be rejected. This change is less risky than it seems because most strings in X.509 are in X509_NAME, which already rejected invalid instances of these string types (but not other string types) during canonicalization. See https://crbug.com/boringssl/412 for a discussion of that mess. Bug: 427 Change-Id: I0d7e24dfd841703d2a8581ec4e78ed5bc3862d75 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/53225 Commit-Queue: David Benjamin Reviewed-by: Adam Langley --- crypto/asn1/asn1_test.cc | 69 ++++++++++++++++++++++++++++++++++++++++ crypto/asn1/tasn_dec.c | 43 ++++++++++++++++++------- 2 files changed, 101 insertions(+), 11 deletions(-) diff --git a/crypto/asn1/asn1_test.cc b/crypto/asn1/asn1_test.cc index 442c1eabb..195d5a89d 100644 --- a/crypto/asn1/asn1_test.cc +++ b/crypto/asn1/asn1_test.cc @@ -2175,6 +2175,75 @@ TEST(ASN1Test, ZeroTag) { 0x04, 0x01, 0x84, 0xb7, 0x09, 0x01, 0x00, 0x01, 0x61}); } +TEST(ASN1Test, StringEncoding) { + const struct { + ASN1_STRING *(*d2i)(ASN1_STRING **out, const uint8_t **inp, long len); + std::vector in; + bool valid; + } kTests[] = { + // All OCTET STRINGs are valid. + {d2i_ASN1_OCTET_STRING, {0x04, 0x00}, true}, + {d2i_ASN1_OCTET_STRING, {0x04, 0x01, 0x00}, true}, + + // UTF8String must be valid UTF-8. + {d2i_ASN1_UTF8STRING, {0x0c, 0x00}, true}, + {d2i_ASN1_UTF8STRING, {0x0c, 0x01, 'a'}, true}, + {d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xe2, 0x98, 0x83}, true}, + // Non-minimal, two-byte UTF-8. + {d2i_ASN1_UTF8STRING, {0x0c, 0x02, 0xc0, 0x81}, false}, + // Truncated, four-byte UTF-8. + {d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xf0, 0x80, 0x80}, false}, + // Low-surrogate value. + {d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xed, 0xa0, 0x80}, false}, + // High-surrogate value. + {d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xed, 0xb0, 0x81}, false}, + + // BMPString must be valid UCS-2. + {d2i_ASN1_BMPSTRING, {0x1e, 0x00}, true}, + {d2i_ASN1_BMPSTRING, {0x1e, 0x02, 0x00, 'a'}, true}, + // Truncated code unit. + {d2i_ASN1_BMPSTRING, {0x1e, 0x01, 'a'}, false}, + // Lone surrogate. + {d2i_ASN1_BMPSTRING, {0x1e, 0x02, 0xd8, 0}, false}, + // BMPString is UCS-2, not UTF-16, so surrogate pairs are also invalid. + {d2i_ASN1_BMPSTRING, {0x1e, 0x04, 0xd8, 0, 0xdc, 1}, false}, + + // UniversalString must be valid UTF-32. + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x00}, true}, + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x00, 0x00, 'a'}, true}, + // Maximum code point. + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x10, 0xff, 0xfd}, true}, + // Reserved. + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x10, 0xff, 0xfe}, false}, + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x10, 0xff, 0xff}, false}, + // Too high. + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x11, 0x00, 0x00}, false}, + // Surrogates are not characters. + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x00, 0xd8, 0}, false}, + // Truncated codepoint. + {d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x03, 0x00, 0x00, 0x00}, false}, + + // We interpret T61String as Latin-1, so all inputs are valid. + {d2i_ASN1_T61STRING, {0x14, 0x00}, true}, + {d2i_ASN1_T61STRING, {0x14, 0x01, 0x00}, true}, + }; + for (const auto& t : kTests) { + SCOPED_TRACE(Bytes(t.in)); + const uint8_t *inp; + + if (t.d2i != nullptr) { + inp = t.in.data(); + bssl::UniquePtr str(t.d2i(nullptr, &inp, t.in.size())); + EXPECT_EQ(t.valid, str != nullptr); + } + + // Also test with the ANY parser. + inp = t.in.data(); + bssl::UniquePtr any(d2i_ASN1_TYPE(nullptr, &inp, t.in.size())); + EXPECT_EQ(t.valid, any != nullptr); + } +} + // The ASN.1 macros do not work on Windows shared library builds, where usage of // |OPENSSL_EXPORT| is a bit stricter. #if !defined(OPENSSL_WINDOWS) || !defined(BORINGSSL_SHARED_LIBRARY) diff --git a/crypto/asn1/tasn_dec.c b/crypto/asn1/tasn_dec.c index c855fe529..de8779171 100644 --- a/crypto/asn1/tasn_dec.c +++ b/crypto/asn1/tasn_dec.c @@ -63,6 +63,7 @@ #include #include +#include "../bytestring/internal.h" #include "../internal.h" #include "internal.h" @@ -797,32 +798,51 @@ static int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, case V_ASN1_OTHER: case V_ASN1_SET: case V_ASN1_SEQUENCE: - default: - if (utype == V_ASN1_BMPSTRING && (len & 1)) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_BMPSTRING_IS_WRONG_LENGTH); - goto err; + default: { + CBS cbs; + CBS_init(&cbs, cont, (size_t)len); + if (utype == V_ASN1_BMPSTRING) { + while (CBS_len(&cbs) != 0) { + uint32_t c; + if (!cbs_get_ucs2_be(&cbs, &c)) { + OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING); + goto err; + } + } } - if (utype == V_ASN1_UNIVERSALSTRING && (len & 3)) { - OPENSSL_PUT_ERROR(ASN1, ASN1_R_UNIVERSALSTRING_IS_WRONG_LENGTH); - goto err; + if (utype == V_ASN1_UNIVERSALSTRING) { + while (CBS_len(&cbs) != 0) { + uint32_t c; + if (!cbs_get_utf32_be(&cbs, &c)) { + OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING); + goto err; + } + } + } + if (utype == V_ASN1_UTF8STRING) { + while (CBS_len(&cbs) != 0) { + uint32_t c; + if (!cbs_get_utf8(&cbs, &c)) { + OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UTF8STRING); + goto err; + } + } } if (utype == V_ASN1_UTCTIME) { - CBS cbs; - CBS_init(&cbs, cont, (size_t)len); if (!CBS_parse_utc_time(&cbs, NULL, /*allow_timezone_offset=*/1)) { OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_TIME_FORMAT); goto err; } } if (utype == V_ASN1_GENERALIZEDTIME) { - CBS cbs; - CBS_init(&cbs, cont, (size_t)len); if (!CBS_parse_generalized_time(&cbs, NULL, /*allow_timezone_offset=*/0)) { OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_TIME_FORMAT); goto err; } } + // TODO(https://crbug.com/boringssl/427): Check other string types. + // All based on ASN1_STRING and handled the same if (!*pval) { stmp = ASN1_STRING_type_new(utype); @@ -842,6 +862,7 @@ static int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, goto err; } break; + } } // If ASN1_ANY and NULL type fix up value if (typ && (utype == V_ASN1_NULL)) {