Rewrite ASN1_STRING_print_ex escaping.

The original implementation uses a table generated by a Perl script, and then relies on some subset of ASN1_STRFLGS_* constants overlapping with CHARTYPE_* constants, while masking off the ones that don't align. Allocating ASN1_STRFLGS_* constants is already complex with the XN_FLAG_* interaction. Avoid the additional CHARTYPE_* interaction by just writing out what it's recognizing in code. If you ignore CHARTYPE_PRINTABLESTRING (which is unused), that table is just recognizing 9 characters anyway. Also this gets charmap.h out of the way so I can clang-format every file in here without having to constantly exclude it. Change-Id: I73f31324e4b8a815887afba459e50ed091a9f999 Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/52729 Reviewed-by: Bob Beck <bbe@google.com> Commit-Queue: Bob Beck <bbe@google.com>
3 years ago · ac57319dd8
parent 97c6032de7
commit ac57319dd8
3 changed files with 61 additions and 245 deletions
--- a/crypto/asn1/a_strex.c
+++ b/crypto/asn1/a_strex.c
@ -64,24 +64,11 @@
 #include <openssl/bio.h>
 #include <openssl/mem.h>

-#include "charmap.h"
 #include "internal.h"


-// These flags must be distinct from |ESC_FLAGS| and fit in a byte.
-
-// Character is a valid PrintableString character
-#define CHARTYPE_PRINTABLESTRING 0x10
-// Character needs escaping if it is the first character
-#define CHARTYPE_FIRST_ESC_2253 0x20
-// Character needs escaping if it is the last character
-#define CHARTYPE_LAST_ESC_2253 0x40
-
-#define CHARTYPE_BS_ESC         (ASN1_STRFLGS_ESC_2253 | CHARTYPE_FIRST_ESC_2253 | CHARTYPE_LAST_ESC_2253)
-
-#define ESC_FLAGS (ASN1_STRFLGS_ESC_2253 | \
-                  ASN1_STRFLGS_ESC_QUOTE | \
-                  ASN1_STRFLGS_ESC_CTRL | \
+#define ESC_FLAGS                                                           \
+  (ASN1_STRFLGS_ESC_2253 | ASN1_STRFLGS_ESC_QUOTE | ASN1_STRFLGS_ESC_CTRL | \
   ASN1_STRFLGS_ESC_MSB)

 static int maybe_write(BIO *out, const void *buf, int len)
@ -90,70 +77,54 @@ static int maybe_write(BIO *out, const void *buf, int len)
    return out == NULL || BIO_write(out, buf, len) == len;
 }

-/*
- * This function handles display of strings, one character at a time. It is
- * passed an unsigned long for each character because it could come from 2 or
- * even 4 byte forms.
- */
-
-#define HEX_SIZE(type) (sizeof(type)*2)
-
-static int do_esc_char(uint32_t c, unsigned char flags, char *do_quotes,
-                       BIO *out)
+static int is_control_character(unsigned char c)
 {
-    unsigned char chflgs, chtmp;
-    char tmphex[HEX_SIZE(uint32_t) + 3];
+    return c < 32 || c == 127;
+}

+static int do_esc_char(uint32_t c, unsigned long flags, char *do_quotes,
+                       BIO *out, int is_first, int is_last)
+{
+    /* |c| is a |uint32_t| because, depending on |ASN1_STRFLGS_UTF8_CONVERT|,
+     * we may be escaping bytes or Unicode codepoints. */
+    char buf[16];  /* Large enough for "\\W01234567". */
+    unsigned char u8 = (unsigned char)c;
    if (c > 0xffff) {
-        BIO_snprintf(tmphex, sizeof tmphex, "\\W%08" PRIX32, c);
-        if (!maybe_write(out, tmphex, 10))
-            return -1;
-        return 10;
-    }
-    if (c > 0xff) {
-        BIO_snprintf(tmphex, sizeof tmphex, "\\U%04" PRIX32, c);
-        if (!maybe_write(out, tmphex, 6))
-            return -1;
-        return 6;
-    }
-    chtmp = (unsigned char)c;
-    if (chtmp > 0x7f)
-        chflgs = flags & ASN1_STRFLGS_ESC_MSB;
-    else
-        chflgs = char_type[chtmp] & flags;
-    if (chflgs & CHARTYPE_BS_ESC) {
-        /* If we don't escape with quotes, signal we need quotes */
-        if (chflgs & ASN1_STRFLGS_ESC_QUOTE) {
-            if (do_quotes)
+        BIO_snprintf(buf, sizeof(buf), "\\W%08" PRIX32, c);
+    } else if (c > 0xff) {
+        BIO_snprintf(buf, sizeof(buf), "\\U%04" PRIX32, c);
+    } else if ((flags & ASN1_STRFLGS_ESC_MSB) && c > 0x7f) {
+        BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
+    } else if ((flags & ASN1_STRFLGS_ESC_CTRL) && is_control_character(c)) {
+        BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
+    } else if (flags & ASN1_STRFLGS_ESC_2253) {
+        /* See RFC 2253, sections 2.4 and 4. */
+        if (c == '\\' || c == '"') {
+            /* Quotes and backslashes are always escaped, quoted or not. */
+            BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
+        } else if (c == ',' || c == '+' || c == '<' || c == '>' || c == ';' ||
+                   (is_first && (c == ' ' || c == '#')) ||
+                   (is_last && (c == ' '))) {
+            if (flags & ASN1_STRFLGS_ESC_QUOTE) {
+                /* No need to escape, just tell the caller to quote. */
+                if (do_quotes != NULL) {
                    *do_quotes = 1;
-            if (!maybe_write(out, &chtmp, 1))
-                return -1;
-            return 1;
                }
-        if (!maybe_write(out, "\\", 1))
-            return -1;
-        if (!maybe_write(out, &chtmp, 1))
-            return -1;
-        return 2;
+                return maybe_write(out, &u8, 1) ? 1 : -1;
            }
-    if (chflgs & (ASN1_STRFLGS_ESC_CTRL | ASN1_STRFLGS_ESC_MSB)) {
-        BIO_snprintf(tmphex, 11, "\\%02X", chtmp);
-        if (!maybe_write(out, tmphex, 3))
-            return -1;
-        return 3;
+            BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
+        } else {
+            return maybe_write(out, &u8, 1) ? 1 : -1;
        }
-    /*
-     * If we get this far and do any escaping at all must escape the escape
-     * character itself: backslash.
-     */
-    if (chtmp == '\\' && flags & ESC_FLAGS) {
-        if (!maybe_write(out, "\\\\", 2))
-            return -1;
-        return 2;
+    } else if ((flags & ESC_FLAGS) && c == '\\') {
+        /* If any escape flags are set, also escape backslashes. */
+        BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
+    } else {
+        return maybe_write(out, &u8, 1) ? 1 : -1;
    }
-    if (!maybe_write(out, &chtmp, 1))
-        return -1;
-    return 1;
+
+    int len = strlen(buf);
+    return maybe_write(out, buf, len) ? len : -1;
 }

 /*
@ -163,7 +134,7 @@ static int do_esc_char(uint32_t c, unsigned char flags, char *do_quotes,
 */

 static int do_buf(const unsigned char *buf, int buflen, int encoding,
-                  int utf8_convert, unsigned char flags, char *quotes, BIO *out)
+                  int utf8_convert, unsigned long flags, char *quotes, BIO *out)
 {
    /* Reject invalid UCS-4 and UCS-2 lengths without parsing. */
    switch (encoding) {
@ -185,10 +156,7 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
    const unsigned char *q = buf + buflen;
    int outlen = 0;
    while (p != q) {
-        unsigned char orflags = 0;
-        if (p == buf && flags & ASN1_STRFLGS_ESC_2253) {
-            orflags = CHARTYPE_FIRST_ESC_2253;
-        }
+        const int is_first = p == buf;
        /* TODO(davidben): Replace this with |cbs_get_ucs2_be|, etc., to check
         * for invalid codepoints. Before doing that, enforce it in the parser,
         * https://crbug.com/boringssl/427, so these error cases are not
@ -224,8 +192,7 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
            assert(0);
            return -1;
        }
-        if (p == q && flags & ASN1_STRFLGS_ESC_2253)
-            orflags = CHARTYPE_LAST_ESC_2253;
+        const int is_last = p == q;
        if (utf8_convert) {
            unsigned char utfbuf[6];
            int utflen;
@ -237,14 +204,15 @@ static int do_buf(const unsigned char *buf, int buflen, int encoding,
                 * otherwise each character will be > 0x7f and so the
                 * character will never be escaped on first and last.
                 */
-                int len = do_esc_char(utfbuf[i], flags | orflags, quotes, out);
+                int len = do_esc_char(utfbuf[i], flags, quotes, out, is_first,
+                                      is_last);
                if (len < 0) {
                    return -1;
                }
                outlen += len;
            }
        } else {
-            int len = do_esc_char(c, flags | orflags, quotes, out);
+            int len = do_esc_char(c, flags, quotes, out, is_first, is_last);
            if (len < 0) {
                return -1;
            }
@ -281,14 +249,14 @@ static int do_hex_dump(BIO *out, unsigned char *buf, int buflen)
 * encoding. This uses the RFC 2253 #01234 format.
 */

-static int do_dump(unsigned long lflags, BIO *out, const ASN1_STRING *str)
+static int do_dump(unsigned long flags, BIO *out, const ASN1_STRING *str)
 {
    if (!maybe_write(out, "#", 1)) {
        return -1;
    }

    /* If we don't dump DER encoding just dump content octets */
-    if (!(lflags & ASN1_STRFLGS_DUMP_DER)) {
+    if (!(flags & ASN1_STRFLGS_DUMP_DER)) {
        int outlen = do_hex_dump(out, str->data, str->length);
        if (outlen < 0) {
            return -1;
@ -362,13 +330,11 @@ static int string_type_to_encoding(int type) {
 * an error occurred.
 */

-int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
+int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags)
 {
-    /* Keep a copy of escape flags */
-    unsigned char flags = (unsigned char)(lflags & ESC_FLAGS);
    int type = str->type;
    int outlen = 0;
-    if (lflags & ASN1_STRFLGS_SHOW_TYPE) {
+    if (flags & ASN1_STRFLGS_SHOW_TYPE) {
        const char *tagname = ASN1_tag2str(type);
        outlen += strlen(tagname);
        if (!maybe_write(out, tagname, outlen) || !maybe_write(out, ":", 1))
@ -378,21 +344,21 @@ int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)

    /* Decide what to do with |str|, either dump the contents or display it. */
    int encoding;
-    if (lflags & ASN1_STRFLGS_DUMP_ALL) {
+    if (flags & ASN1_STRFLGS_DUMP_ALL) {
        /* Dump everything. */
        encoding = -1;
-    } else if (lflags & ASN1_STRFLGS_IGNORE_TYPE) {
+    } else if (flags & ASN1_STRFLGS_IGNORE_TYPE) {
        /* Ignore the string type and interpret the contents as Latin-1. */
        encoding = MBSTRING_ASC;
    } else {
        encoding = string_type_to_encoding(type);
-        if (encoding == -1 && (lflags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
+        if (encoding == -1 && (flags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
            encoding = MBSTRING_ASC;
        }
    }

    if (encoding == -1) {
-        int len = do_dump(lflags, out, str);
+        int len = do_dump(flags, out, str);
        if (len < 0)
            return -1;
        outlen += len;
@ -400,7 +366,7 @@ int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
    }

    int utf8_convert = 0;
-    if (lflags & ASN1_STRFLGS_UTF8_CONVERT) {
+    if (flags & ASN1_STRFLGS_UTF8_CONVERT) {
        /* If the string is UTF-8, skip decoding and just interpret it as 1 byte
         * per character to avoid converting twice.
         *
--- a/crypto/asn1/charmap.h
+++ b/crypto/asn1/charmap.h
@ -1,15 +0,0 @@
-/*
- * Auto generated with chartype.pl script. Mask of various character
- * properties
- */
-
-static const unsigned char char_type[] = {
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    120, 0, 1, 40, 0, 0, 0, 16, 16, 16, 0, 25, 25, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 9, 9, 16, 9, 16,
-    0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 0, 0, 0,
-    0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
-    16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 2
-};
--- a/crypto/asn1/charmap.pl
+++ b/crypto/asn1/charmap.pl
@ -1,135 +0,0 @@
-#!/usr/local/bin/perl -w
-
-# Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
-# 2000.
-#
-# ====================================================================
-# Copyright (c) 2000 The OpenSSL Project.  All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in
-#    the documentation and/or other materials provided with the
-#    distribution.
-#
-# 3. All advertising materials mentioning features or use of this
-#    software must display the following acknowledgment:
-#    "This product includes software developed by the OpenSSL Project
-#    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
-#
-# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
-#    endorse or promote products derived from this software without
-#    prior written permission. For written permission, please contact
-#    licensing@OpenSSL.org.
-#
-# 5. Products derived from this software may not be called "OpenSSL"
-#    nor may "OpenSSL" appear in their names without prior written
-#    permission of the OpenSSL Project.
-#
-# 6. Redistributions of any form whatsoever must retain the following
-#    acknowledgment:
-#    "This product includes software developed by the OpenSSL Project
-#    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
-#
-# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
-# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-# OF THE POSSIBILITY OF SUCH DAMAGE.
-# ====================================================================
-#
-# This product includes cryptographic software written by Eric Young
-# (eay@cryptsoft.com).  This product includes software written by Tim
-# Hudson (tjh@cryptsoft.com).
-
-use strict;
-
-my ($i, @arr);
-
-# Set up an array with the type of ASCII characters
-# Each set bit represents a character property.
-
-# RFC 2253 character properties
-my $RFC2253_ESC = 1;	# Character escaped with \
-my $ESC_CTRL	= 2;	# Escaped control character
-# These are used with RFC 1779 quoting using "
-my $NOESC_QUOTE	= 8;	# Not escaped if quoted
-my $PSTRING_CHAR = 0x10;	# Valid PrintableString character
-my $RFC2253_FIRST_ESC = 0x20; # Escaped with \ if first character
-my $RFC2253_LAST_ESC = 0x40;  # Escaped with \ if last character
-
-for($i = 0; $i < 128; $i++) {
-	# Set the RFC 2253 escape characters (control)
-	$arr[$i] = 0;
-	if(($i < 32) || ($i > 126)) {
-		$arr[$i] |= $ESC_CTRL;
-	}
-
-	# Some PrintableString characters
-	if(		   ( ( $i >= ord("a")) && ( $i <= ord("z")) )
-			|| (  ( $i >= ord("A")) && ( $i <= ord("Z")) )
-			|| (  ( $i >= ord("0")) && ( $i <= ord("9")) )  ) {
-		$arr[$i] |= $PSTRING_CHAR;
-	}
-}
-
-# Now setup the rest
-
-# Remaining RFC 2253 escaped characters
-
-$arr[ord(" ")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC | $RFC2253_LAST_ESC;
-$arr[ord("#")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC;
-
-$arr[ord(",")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord("+")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord("\"")] |= $RFC2253_ESC;
-$arr[ord("\\")] |= $RFC2253_ESC;
-$arr[ord("<")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord(">")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord(";")] |= $NOESC_QUOTE | $RFC2253_ESC;
-
-# Remaining PrintableString characters
-
-$arr[ord(" ")] |= $PSTRING_CHAR;
-$arr[ord("'")] |= $PSTRING_CHAR;
-$arr[ord("(")] |= $PSTRING_CHAR;
-$arr[ord(")")] |= $PSTRING_CHAR;
-$arr[ord("+")] |= $PSTRING_CHAR;
-$arr[ord(",")] |= $PSTRING_CHAR;
-$arr[ord("-")] |= $PSTRING_CHAR;
-$arr[ord(".")] |= $PSTRING_CHAR;
-$arr[ord("/")] |= $PSTRING_CHAR;
-$arr[ord(":")] |= $PSTRING_CHAR;
-$arr[ord("=")] |= $PSTRING_CHAR;
-$arr[ord("?")] |= $PSTRING_CHAR;
-
-# Now generate the C code
-
-print <<EOF;
-/* Auto generated with chartype.pl script.
- * Mask of various character properties
- */
-
-static const unsigned char char_type[] = {
-EOF
-
-for($i = 0; $i < 128; $i++) {
-	print("\n") if($i && (($i % 16) == 0));
-	printf("%2d", $arr[$i]);
-	print(",") if ($i != 127);
-}
-print("\n};\n\n");
-