Rewrite ASN1_STRING_print_ex escaping.

The original implementation uses a table generated by a Perl script,
and then relies on some subset of ASN1_STRFLGS_* constants overlapping
with CHARTYPE_* constants, while masking off the ones that don't align.

Allocating ASN1_STRFLGS_* constants is already complex with the
XN_FLAG_* interaction. Avoid the additional CHARTYPE_* interaction by
just writing out what it's recognizing in code. If you ignore
CHARTYPE_PRINTABLESTRING (which is unused), that table is just
recognizing 9 characters anyway.

Also this gets charmap.h out of the way so I can clang-format every file
in here without having to constantly exclude it.

Change-Id: I73f31324e4b8a815887afba459e50ed091a9f999
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/52729
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
chromium-5359
David Benjamin 3 years ago committed by Boringssl LUCI CQ
parent 97c6032de7
commit ac57319dd8
  1. 148
      crypto/asn1/a_strex.c
  2. 15
      crypto/asn1/charmap.h
  3. 135
      crypto/asn1/charmap.pl

@ -64,24 +64,11 @@
#include <openssl/bio.h>
#include <openssl/mem.h>
#include "charmap.h"
#include "internal.h"
// These flags must be distinct from |ESC_FLAGS| and fit in a byte.
// Character is a valid PrintableString character
#define CHARTYPE_PRINTABLESTRING 0x10
// Character needs escaping if it is the first character
#define CHARTYPE_FIRST_ESC_2253 0x20
// Character needs escaping if it is the last character
#define CHARTYPE_LAST_ESC_2253 0x40
#define CHARTYPE_BS_ESC (ASN1_STRFLGS_ESC_2253 | CHARTYPE_FIRST_ESC_2253 | CHARTYPE_LAST_ESC_2253)
#define ESC_FLAGS (ASN1_STRFLGS_ESC_2253 | \
ASN1_STRFLGS_ESC_QUOTE | \
ASN1_STRFLGS_ESC_CTRL | \
#define ESC_FLAGS \
(ASN1_STRFLGS_ESC_2253 | ASN1_STRFLGS_ESC_QUOTE | ASN1_STRFLGS_ESC_CTRL | \
ASN1_STRFLGS_ESC_MSB)
static int maybe_write(BIO *out, const void *buf, int len)
@ -90,70 +77,54 @@ static int maybe_write(BIO *out, const void *buf, int len)
return out == NULL || BIO_write(out, buf, len) == len;
}
/*
* This function handles display of strings, one character at a time. It is
* passed an unsigned long for each character because it could come from 2 or
* even 4 byte forms.
*/
#define HEX_SIZE(type) (sizeof(type)*2)
static int do_esc_char(uint32_t c, unsigned char flags, char *do_quotes,
BIO *out)
static int is_control_character(unsigned char c)
{
unsigned char chflgs, chtmp;
char tmphex[HEX_SIZE(uint32_t) + 3];
return c < 32 || c == 127;
}
static int do_esc_char(uint32_t c, unsigned long flags, char *do_quotes,
BIO *out, int is_first, int is_last)
{
/* |c| is a |uint32_t| because, depending on |ASN1_STRFLGS_UTF8_CONVERT|,
* we may be escaping bytes or Unicode codepoints. */
char buf[16]; /* Large enough for "\\W01234567". */
unsigned char u8 = (unsigned char)c;
if (c > 0xffff) {
BIO_snprintf(tmphex, sizeof tmphex, "\\W%08" PRIX32, c);
if (!maybe_write(out, tmphex, 10))
return -1;
return 10;
}
if (c > 0xff) {
BIO_snprintf(tmphex, sizeof tmphex, "\\U%04" PRIX32, c);
if (!maybe_write(out, tmphex, 6))
return -1;
return 6;
}
chtmp = (unsigned char)c;
if (chtmp > 0x7f)
chflgs = flags & ASN1_STRFLGS_ESC_MSB;
else
chflgs = char_type[chtmp] & flags;
if (chflgs & CHARTYPE_BS_ESC) {
/* If we don't escape with quotes, signal we need quotes */
if (chflgs & ASN1_STRFLGS_ESC_QUOTE) {
if (do_quotes)
BIO_snprintf(buf, sizeof(buf), "\\W%08" PRIX32, c);
} else if (c > 0xff) {
BIO_snprintf(buf, sizeof(buf), "\\U%04" PRIX32, c);
} else if ((flags & ASN1_STRFLGS_ESC_MSB) && c > 0x7f) {
BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
} else if ((flags & ASN1_STRFLGS_ESC_CTRL) && is_control_character(c)) {
BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
} else if (flags & ASN1_STRFLGS_ESC_2253) {
/* See RFC 2253, sections 2.4 and 4. */
if (c == '\\' || c == '"') {
/* Quotes and backslashes are always escaped, quoted or not. */
BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
} else if (c == ',' || c == '+' || c == '<' || c == '>' || c == ';' ||
(is_first && (c == ' ' || c == '#')) ||
(is_last && (c == ' '))) {
if (flags & ASN1_STRFLGS_ESC_QUOTE) {
/* No need to escape, just tell the caller to quote. */
if (do_quotes != NULL) {
*do_quotes = 1;
if (!maybe_write(out, &chtmp, 1))
return -1;
return 1;
}
if (!maybe_write(out, "\\", 1))
return -1;
if (!maybe_write(out, &chtmp, 1))
return -1;
return 2;
return maybe_write(out, &u8, 1) ? 1 : -1;
}
if (chflgs & (ASN1_STRFLGS_ESC_CTRL | ASN1_STRFLGS_ESC_MSB)) {
BIO_snprintf(tmphex, 11, "\\%02X", chtmp);
if (!maybe_write(out, tmphex, 3))
return -1;
return 3;
BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
} else {
return maybe_write(out, &u8, 1) ? 1 : -1;
}
/*
* If we get this far and do any escaping at all must escape the escape
* character itself: backslash.
*/
if (chtmp == '\\' && flags & ESC_FLAGS) {
if (!maybe_write(out, "\\\\", 2))
return -1;
return 2;
} else if ((flags & ESC_FLAGS) && c == '\\') {
/* If any escape flags are set, also escape backslashes. */
BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
} else {
return maybe_write(out, &u8, 1) ? 1 : -1;
}
if (!maybe_write(out, &chtmp, 1))
return -1;
return 1;
int len = strlen(buf);
return maybe_write(out, buf, len) ? len : -1;
}
/*
@ -163,7 +134,7 @@ static int do_esc_char(uint32_t c, unsigned char flags, char *do_quotes,
*/
static int do_buf(const unsigned char *buf, int buflen, int encoding,
int utf8_convert, unsigned char flags, char *quotes, BIO *out)
int utf8_convert, unsigned long flags, char *quotes, BIO *out)
{
/* Reject invalid UCS-4 and UCS-2 lengths without parsing. */
switch (encoding) {
@ -185,10 +156,7 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
const unsigned char *q = buf + buflen;
int outlen = 0;
while (p != q) {
unsigned char orflags = 0;
if (p == buf && flags & ASN1_STRFLGS_ESC_2253) {
orflags = CHARTYPE_FIRST_ESC_2253;
}
const int is_first = p == buf;
/* TODO(davidben): Replace this with |cbs_get_ucs2_be|, etc., to check
* for invalid codepoints. Before doing that, enforce it in the parser,
* https://crbug.com/boringssl/427, so these error cases are not
@ -224,8 +192,7 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
assert(0);
return -1;
}
if (p == q && flags & ASN1_STRFLGS_ESC_2253)
orflags = CHARTYPE_LAST_ESC_2253;
const int is_last = p == q;
if (utf8_convert) {
unsigned char utfbuf[6];
int utflen;
@ -237,14 +204,15 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
* otherwise each character will be > 0x7f and so the
* character will never be escaped on first and last.
*/
int len = do_esc_char(utfbuf[i], flags | orflags, quotes, out);
int len = do_esc_char(utfbuf[i], flags, quotes, out, is_first,
is_last);
if (len < 0) {
return -1;
}
outlen += len;
}
} else {
int len = do_esc_char(c, flags | orflags, quotes, out);
int len = do_esc_char(c, flags, quotes, out, is_first, is_last);
if (len < 0) {
return -1;
}
@ -281,14 +249,14 @@ static int do_hex_dump(BIO *out, unsigned char *buf, int buflen)
* encoding. This uses the RFC 2253 #01234 format.
*/
static int do_dump(unsigned long lflags, BIO *out, const ASN1_STRING *str)
static int do_dump(unsigned long flags, BIO *out, const ASN1_STRING *str)
{
if (!maybe_write(out, "#", 1)) {
return -1;
}
/* If we don't dump DER encoding just dump content octets */
if (!(lflags & ASN1_STRFLGS_DUMP_DER)) {
if (!(flags & ASN1_STRFLGS_DUMP_DER)) {
int outlen = do_hex_dump(out, str->data, str->length);
if (outlen < 0) {
return -1;
@ -362,13 +330,11 @@ static int string_type_to_encoding(int type) {
* an error occurred.
*/
int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags)
{
/* Keep a copy of escape flags */
unsigned char flags = (unsigned char)(lflags & ESC_FLAGS);
int type = str->type;
int outlen = 0;
if (lflags & ASN1_STRFLGS_SHOW_TYPE) {
if (flags & ASN1_STRFLGS_SHOW_TYPE) {
const char *tagname = ASN1_tag2str(type);
outlen += strlen(tagname);
if (!maybe_write(out, tagname, outlen) || !maybe_write(out, ":", 1))
@ -378,21 +344,21 @@ int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
/* Decide what to do with |str|, either dump the contents or display it. */
int encoding;
if (lflags & ASN1_STRFLGS_DUMP_ALL) {
if (flags & ASN1_STRFLGS_DUMP_ALL) {
/* Dump everything. */
encoding = -1;
} else if (lflags & ASN1_STRFLGS_IGNORE_TYPE) {
} else if (flags & ASN1_STRFLGS_IGNORE_TYPE) {
/* Ignore the string type and interpret the contents as Latin-1. */
encoding = MBSTRING_ASC;
} else {
encoding = string_type_to_encoding(type);
if (encoding == -1 && (lflags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
if (encoding == -1 && (flags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
encoding = MBSTRING_ASC;
}
}
if (encoding == -1) {
int len = do_dump(lflags, out, str);
int len = do_dump(flags, out, str);
if (len < 0)
return -1;
outlen += len;
@ -400,7 +366,7 @@ int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
}
int utf8_convert = 0;
if (lflags & ASN1_STRFLGS_UTF8_CONVERT) {
if (flags & ASN1_STRFLGS_UTF8_CONVERT) {
/* If the string is UTF-8, skip decoding and just interpret it as 1 byte
* per character to avoid converting twice.
*

@ -1,15 +0,0 @@
/*
* Auto generated with chartype.pl script. Mask of various character
* properties
*/
static const unsigned char char_type[] = {
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120, 0, 1, 40, 0, 0, 0, 16, 16, 16, 0, 25, 25, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 9, 9, 16, 9, 16,
0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 0, 0, 0,
0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 2
};

@ -1,135 +0,0 @@
#!/usr/local/bin/perl -w
# Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
# 2000.
#
# ====================================================================
# Copyright (c) 2000 The OpenSSL Project. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. All advertising materials mentioning features or use of this
# software must display the following acknowledgment:
# "This product includes software developed by the OpenSSL Project
# for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
#
# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
# endorse or promote products derived from this software without
# prior written permission. For written permission, please contact
# licensing@OpenSSL.org.
#
# 5. Products derived from this software may not be called "OpenSSL"
# nor may "OpenSSL" appear in their names without prior written
# permission of the OpenSSL Project.
#
# 6. Redistributions of any form whatsoever must retain the following
# acknowledgment:
# "This product includes software developed by the OpenSSL Project
# for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
#
# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# ====================================================================
#
# This product includes cryptographic software written by Eric Young
# (eay@cryptsoft.com). This product includes software written by Tim
# Hudson (tjh@cryptsoft.com).
use strict;
my ($i, @arr);
# Set up an array with the type of ASCII characters
# Each set bit represents a character property.
# RFC 2253 character properties
my $RFC2253_ESC = 1; # Character escaped with \
my $ESC_CTRL = 2; # Escaped control character
# These are used with RFC 1779 quoting using "
my $NOESC_QUOTE = 8; # Not escaped if quoted
my $PSTRING_CHAR = 0x10; # Valid PrintableString character
my $RFC2253_FIRST_ESC = 0x20; # Escaped with \ if first character
my $RFC2253_LAST_ESC = 0x40; # Escaped with \ if last character
for($i = 0; $i < 128; $i++) {
# Set the RFC 2253 escape characters (control)
$arr[$i] = 0;
if(($i < 32) || ($i > 126)) {
$arr[$i] |= $ESC_CTRL;
}
# Some PrintableString characters
if( ( ( $i >= ord("a")) && ( $i <= ord("z")) )
|| ( ( $i >= ord("A")) && ( $i <= ord("Z")) )
|| ( ( $i >= ord("0")) && ( $i <= ord("9")) ) ) {
$arr[$i] |= $PSTRING_CHAR;
}
}
# Now setup the rest
# Remaining RFC 2253 escaped characters
$arr[ord(" ")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC | $RFC2253_LAST_ESC;
$arr[ord("#")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC;
$arr[ord(",")] |= $NOESC_QUOTE | $RFC2253_ESC;
$arr[ord("+")] |= $NOESC_QUOTE | $RFC2253_ESC;
$arr[ord("\"")] |= $RFC2253_ESC;
$arr[ord("\\")] |= $RFC2253_ESC;
$arr[ord("<")] |= $NOESC_QUOTE | $RFC2253_ESC;
$arr[ord(">")] |= $NOESC_QUOTE | $RFC2253_ESC;
$arr[ord(";")] |= $NOESC_QUOTE | $RFC2253_ESC;
# Remaining PrintableString characters
$arr[ord(" ")] |= $PSTRING_CHAR;
$arr[ord("'")] |= $PSTRING_CHAR;
$arr[ord("(")] |= $PSTRING_CHAR;
$arr[ord(")")] |= $PSTRING_CHAR;
$arr[ord("+")] |= $PSTRING_CHAR;
$arr[ord(",")] |= $PSTRING_CHAR;
$arr[ord("-")] |= $PSTRING_CHAR;
$arr[ord(".")] |= $PSTRING_CHAR;
$arr[ord("/")] |= $PSTRING_CHAR;
$arr[ord(":")] |= $PSTRING_CHAR;
$arr[ord("=")] |= $PSTRING_CHAR;
$arr[ord("?")] |= $PSTRING_CHAR;
# Now generate the C code
print <<EOF;
/* Auto generated with chartype.pl script.
* Mask of various character properties
*/
static const unsigned char char_type[] = {
EOF
for($i = 0; $i < 128; $i++) {
print("\n") if($i && (($i % 16) == 0));
printf("%2d", $arr[$i]);
print(",") if ($i != 127);
}
print("\n};\n\n");
Loading…
Cancel
Save