Check Unicode string encodings in crypto/asn1.

This checks validity for UTF8String, BMPString, and UniversalString.

When we detach the core types from ASN1_ITEM, this will likely also be
reshuffled around, probably into type-specific functions. But for now
just get the checks in place.

Update-Note: Invalid strings in X.509 certificates and other ASN.1
structures will now be rejected. This change is less risky than it seems
because most strings in X.509 are in X509_NAME, which already rejected
invalid instances of these string types (but not other string types)
during canonicalization. See https://crbug.com/boringssl/412 for a
discussion of that mess.

Bug: 427
Change-Id: I0d7e24dfd841703d2a8581ec4e78ed5bc3862d75
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/53225
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
chromium-5359
David Benjamin 3 years ago committed by Boringssl LUCI CQ
parent e7681d1a78
commit 507daf391d
  1. 69
      crypto/asn1/asn1_test.cc
  2. 43
      crypto/asn1/tasn_dec.c

@ -2175,6 +2175,75 @@ TEST(ASN1Test, ZeroTag) {
0x04, 0x01, 0x84, 0xb7, 0x09, 0x01, 0x00, 0x01, 0x61});
}
TEST(ASN1Test, StringEncoding) {
const struct {
ASN1_STRING *(*d2i)(ASN1_STRING **out, const uint8_t **inp, long len);
std::vector<uint8_t> in;
bool valid;
} kTests[] = {
// All OCTET STRINGs are valid.
{d2i_ASN1_OCTET_STRING, {0x04, 0x00}, true},
{d2i_ASN1_OCTET_STRING, {0x04, 0x01, 0x00}, true},
// UTF8String must be valid UTF-8.
{d2i_ASN1_UTF8STRING, {0x0c, 0x00}, true},
{d2i_ASN1_UTF8STRING, {0x0c, 0x01, 'a'}, true},
{d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xe2, 0x98, 0x83}, true},
// Non-minimal, two-byte UTF-8.
{d2i_ASN1_UTF8STRING, {0x0c, 0x02, 0xc0, 0x81}, false},
// Truncated, four-byte UTF-8.
{d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xf0, 0x80, 0x80}, false},
// Low-surrogate value.
{d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xed, 0xa0, 0x80}, false},
// High-surrogate value.
{d2i_ASN1_UTF8STRING, {0x0c, 0x03, 0xed, 0xb0, 0x81}, false},
// BMPString must be valid UCS-2.
{d2i_ASN1_BMPSTRING, {0x1e, 0x00}, true},
{d2i_ASN1_BMPSTRING, {0x1e, 0x02, 0x00, 'a'}, true},
// Truncated code unit.
{d2i_ASN1_BMPSTRING, {0x1e, 0x01, 'a'}, false},
// Lone surrogate.
{d2i_ASN1_BMPSTRING, {0x1e, 0x02, 0xd8, 0}, false},
// BMPString is UCS-2, not UTF-16, so surrogate pairs are also invalid.
{d2i_ASN1_BMPSTRING, {0x1e, 0x04, 0xd8, 0, 0xdc, 1}, false},
// UniversalString must be valid UTF-32.
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x00}, true},
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x00, 0x00, 'a'}, true},
// Maximum code point.
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x10, 0xff, 0xfd}, true},
// Reserved.
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x10, 0xff, 0xfe}, false},
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x10, 0xff, 0xff}, false},
// Too high.
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x11, 0x00, 0x00}, false},
// Surrogates are not characters.
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x04, 0x00, 0x00, 0xd8, 0}, false},
// Truncated codepoint.
{d2i_ASN1_UNIVERSALSTRING, {0x1c, 0x03, 0x00, 0x00, 0x00}, false},
// We interpret T61String as Latin-1, so all inputs are valid.
{d2i_ASN1_T61STRING, {0x14, 0x00}, true},
{d2i_ASN1_T61STRING, {0x14, 0x01, 0x00}, true},
};
for (const auto& t : kTests) {
SCOPED_TRACE(Bytes(t.in));
const uint8_t *inp;
if (t.d2i != nullptr) {
inp = t.in.data();
bssl::UniquePtr<ASN1_STRING> str(t.d2i(nullptr, &inp, t.in.size()));
EXPECT_EQ(t.valid, str != nullptr);
}
// Also test with the ANY parser.
inp = t.in.data();
bssl::UniquePtr<ASN1_TYPE> any(d2i_ASN1_TYPE(nullptr, &inp, t.in.size()));
EXPECT_EQ(t.valid, any != nullptr);
}
}
// The ASN.1 macros do not work on Windows shared library builds, where usage of
// |OPENSSL_EXPORT| is a bit stricter.
#if !defined(OPENSSL_WINDOWS) || !defined(BORINGSSL_SHARED_LIBRARY)

@ -63,6 +63,7 @@
#include <limits.h>
#include <string.h>
#include "../bytestring/internal.h"
#include "../internal.h"
#include "internal.h"
@ -797,32 +798,51 @@ static int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
case V_ASN1_OTHER:
case V_ASN1_SET:
case V_ASN1_SEQUENCE:
default:
if (utype == V_ASN1_BMPSTRING && (len & 1)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_BMPSTRING_IS_WRONG_LENGTH);
goto err;
default: {
CBS cbs;
CBS_init(&cbs, cont, (size_t)len);
if (utype == V_ASN1_BMPSTRING) {
while (CBS_len(&cbs) != 0) {
uint32_t c;
if (!cbs_get_ucs2_be(&cbs, &c)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING);
goto err;
}
}
}
if (utype == V_ASN1_UNIVERSALSTRING && (len & 3)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_UNIVERSALSTRING_IS_WRONG_LENGTH);
goto err;
if (utype == V_ASN1_UNIVERSALSTRING) {
while (CBS_len(&cbs) != 0) {
uint32_t c;
if (!cbs_get_utf32_be(&cbs, &c)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING);
goto err;
}
}
}
if (utype == V_ASN1_UTF8STRING) {
while (CBS_len(&cbs) != 0) {
uint32_t c;
if (!cbs_get_utf8(&cbs, &c)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UTF8STRING);
goto err;
}
}
}
if (utype == V_ASN1_UTCTIME) {
CBS cbs;
CBS_init(&cbs, cont, (size_t)len);
if (!CBS_parse_utc_time(&cbs, NULL, /*allow_timezone_offset=*/1)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_TIME_FORMAT);
goto err;
}
}
if (utype == V_ASN1_GENERALIZEDTIME) {
CBS cbs;
CBS_init(&cbs, cont, (size_t)len);
if (!CBS_parse_generalized_time(&cbs, NULL,
/*allow_timezone_offset=*/0)) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_TIME_FORMAT);
goto err;
}
}
// TODO(https://crbug.com/boringssl/427): Check other string types.
// All based on ASN1_STRING and handled the same
if (!*pval) {
stmp = ASN1_STRING_type_new(utype);
@ -842,6 +862,7 @@ static int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
goto err;
}
break;
}
}
// If ASN1_ANY and NULL type fix up value
if (typ && (utype == V_ASN1_NULL)) {

Loading…
Cancel
Save