Protocol Buffers - Google's data interchange format (grpc依赖)
https://developers.google.com/protocol-buffers/
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
116 lines
3.7 KiB
116 lines
3.7 KiB
/** |
|
* @fileoverview A UTF8 decoder. |
|
*/ |
|
goog.module('protobuf.binary.textencoding'); |
|
|
|
const {checkElementIndex} = goog.require('protobuf.internal.checks'); |
|
|
|
/** |
|
* Combines an array of codePoints into a string. |
|
* @param {!Array<number>} codePoints |
|
* @return {string} |
|
*/ |
|
function codePointsToString(codePoints) { |
|
// Performance: http://jsperf.com/string-fromcharcode-test/13 |
|
let s = '', i = 0; |
|
const length = codePoints.length; |
|
const BATCH_SIZE = 10000; |
|
while (i < length) { |
|
const end = Math.min(i + BATCH_SIZE, length); |
|
s += String.fromCharCode.apply(null, codePoints.slice(i, end)); |
|
i = end; |
|
} |
|
return s; |
|
} |
|
|
|
/** |
|
* Decodes raw bytes into a string. |
|
* Supports codepoints from U+0000 up to U+10FFFF. |
|
* (http://en.wikipedia.org/wiki/UTF-8). |
|
* @param {!DataView} bytes |
|
* @return {string} |
|
*/ |
|
function decode(bytes) { |
|
let cursor = 0; |
|
const codePoints = []; |
|
|
|
while (cursor < bytes.byteLength) { |
|
const c = bytes.getUint8(cursor++); |
|
if (c < 0x80) { // Regular 7-bit ASCII. |
|
codePoints.push(c); |
|
} else if (c < 0xC0) { |
|
// UTF-8 continuation mark. We are out of sync. This |
|
// might happen if we attempted to read a character |
|
// with more than four bytes. |
|
continue; |
|
} else if (c < 0xE0) { // UTF-8 with two bytes. |
|
checkElementIndex(cursor, bytes.byteLength); |
|
const c2 = bytes.getUint8(cursor++); |
|
codePoints.push(((c & 0x1F) << 6) | (c2 & 0x3F)); |
|
} else if (c < 0xF0) { // UTF-8 with three bytes. |
|
checkElementIndex(cursor + 1, bytes.byteLength); |
|
const c2 = bytes.getUint8(cursor++); |
|
const c3 = bytes.getUint8(cursor++); |
|
codePoints.push(((c & 0xF) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)); |
|
} else if (c < 0xF8) { // UTF-8 with 4 bytes. |
|
checkElementIndex(cursor + 2, bytes.byteLength); |
|
const c2 = bytes.getUint8(cursor++); |
|
const c3 = bytes.getUint8(cursor++); |
|
const c4 = bytes.getUint8(cursor++); |
|
// Characters written on 4 bytes have 21 bits for a codepoint. |
|
// We can't fit that on 16bit characters, so we use surrogates. |
|
let codepoint = ((c & 0x07) << 18) | ((c2 & 0x3F) << 12) | |
|
((c3 & 0x3F) << 6) | (c4 & 0x3F); |
|
// Surrogates formula from wikipedia. |
|
// 1. Subtract 0x10000 from codepoint |
|
codepoint -= 0x10000; |
|
// 2. Split this into the high 10-bit value and the low 10-bit value |
|
// 3. Add 0xD800 to the high value to form the high surrogate |
|
// 4. Add 0xDC00 to the low value to form the low surrogate: |
|
const low = (codepoint & 0x3FF) + 0xDC00; |
|
const high = ((codepoint >> 10) & 0x3FF) + 0xD800; |
|
codePoints.push(high, low); |
|
} |
|
} |
|
return codePointsToString(codePoints); |
|
} |
|
|
|
/** |
|
* Writes a UTF16 JavaScript string to the buffer encoded as UTF8. |
|
* @param {string} value The string to write. |
|
* @return {!Uint8Array} An array containing the encoded bytes. |
|
*/ |
|
function encode(value) { |
|
const buffer = []; |
|
|
|
for (let i = 0; i < value.length; i++) { |
|
const c1 = value.charCodeAt(i); |
|
|
|
if (c1 < 0x80) { |
|
buffer.push(c1); |
|
} else if (c1 < 0x800) { |
|
buffer.push((c1 >> 6) | 0xC0); |
|
buffer.push((c1 & 0x3F) | 0x80); |
|
} else if (c1 < 0xD800 || c1 >= 0xE000) { |
|
buffer.push((c1 >> 12) | 0xE0); |
|
buffer.push(((c1 >> 6) & 0x3F) | 0x80); |
|
buffer.push((c1 & 0x3F) | 0x80); |
|
} else { |
|
// surrogate pair |
|
i++; |
|
checkElementIndex(i, value.length); |
|
const c2 = value.charCodeAt(i); |
|
const paired = 0x10000 + (((c1 & 0x3FF) << 10) | (c2 & 0x3FF)); |
|
buffer.push((paired >> 18) | 0xF0); |
|
buffer.push(((paired >> 12) & 0x3F) | 0x80); |
|
buffer.push(((paired >> 6) & 0x3F) | 0x80); |
|
buffer.push((paired & 0x3F) | 0x80); |
|
} |
|
} |
|
return new Uint8Array(buffer); |
|
} |
|
|
|
exports = { |
|
decode, |
|
encode, |
|
};
|
|
|