Uint8Array to string in Javascript

J'ai des données codées UTF-8 dans une gamme d'éléments Uint8Array en version JavaScript. Existe-t-il un moyen efficace de décoder ceux-ci sur une chaîne javascript régulière (je crois que Javascript utilise Unicode 16 bits)? Je ne veux pas ajouter un caractère à l'époque car la concaténation de chaîne deviendrait intensive en CPU.

TextEncoder et TextDecoder partir de la norme Encoding , qui est polyfilled par la bibliothèque stringencoding , convertit entre les chaînes et ArrayBuffers:

 var uint8array = new TextEncoder("utf-8").encode("¢"); var string = new TextDecoder("utf-8").decode(uint8array);

Cela devrait fonctionner:

 // http://www.onicos.com/staff/iz/amuse/javascript/expert/utf.txt /* utf.js - UTF-8 <=> UTF-16 convertion * * Copyright (C) 1999 Masanao Izumo <[email protected]> * Version: 1.0 * LastModified: Dec 25 1999 * This library is free. You can redistribute it and/or modify it. */ function Utf8ArrayToStr(array) { var out, i, len, c; var char2, char3; out = ""; len = array.length; i = 0; while(i < len) { c = array[i++]; switch(c >> 4) { case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7: // 0xxxxxxx out += String.fromCharCode(c); break; case 12: case 13: // 110x xxxx 10xx xxxx char2 = array[i++]; out += String.fromCharCode(((c & 0x1F) << 6) | (char2 & 0x3F)); break; case 14: // 1110 xxxx 10xx xxxx 10xx xxxx char2 = array[i++]; char3 = array[i++]; out += String.fromCharCode(((c & 0x0F) << 12) | ((char2 & 0x3F) << 6) | ((char3 & 0x3F) << 0)); break; } } return out; }

Il est un peu plus propre que les autres solutions, car il n'utilise pas de hacks ni dépend des fonctions du navigateur JS, par exemple, fonctionne également dans d'autres environnements JS.

Découvrez la démo JSFiddle .

Consultez également les questions connexes: ici et ici

Voici ce que j'utilise:

 var str = String.fromCharCode.apply(null, uint8Arr);

Trouvé dans l'une des applications d'échantillons Chrome, bien que cela soit destiné à des blocs de données plus importants où vous n'êtes pas d'accord avec une conversion asynchrone.

 /** * Converts an array buffer to a string * * @private * @param {ArrayBuffer} buf The buffer to convert * @param {Function} callback The function to call when conversion is complete */ function _arrayBufferToString(buf, callback) { var bb = new Blob([new Uint8Array(buf)]); var f = new FileReader(); f.onload = function(e) { callback(e.target.result); }; f.readAsText(bb); }

Faites ce que @Sudhir a dit, puis, pour obtenir un String hors de la liste de nombres séparés par des virgules utiliser:

 for (var i=0; i<unitArr.byteLength; i++) { myString += String.fromCharCode(unitArr[i]) }

Cela vous donnera la chaîne que vous voulez, si elle est toujours pertinente

La solution donnée par Albert fonctionne bien tant que la fonction fournie est invoquée avec rareté et n'est utilisée que pour des tableaux de taille modeste, sinon il est négligeable. Voici une solution améliorée Vanilla JavaScript qui fonctionne à la fois pour Node et les navigateurs et présente les avantages suivants:

• Fonctionne efficacement pour toutes les tailles des tableaux d'octets

• Ne génère aucune chaîne intermédiaire

• Prend en charge les caractères de 4 octets sur les moteurs JS modernes (sinon "?" Est remplacé)

 var utf8ArrayToStr = (function () { var charCache = new Array(128); // Preallocate the cache for the common single byte chars var charFromCodePt = String.fromCodePoint || String.fromCharCode; var result = []; return function (array) { var codePt, byte1; var buffLen = array.length; result.length = 0; for (var i = 0; i < buffLen;) { byte1 = array[i++]; if (byte1 <= 0x7F) { codePt = byte1; } else if (byte1 <= 0xDF) { codePt = ((byte1 & 0x1F) << 6) | (array[i++] & 0x3F); } else if (byte1 <= 0xEF) { codePt = ((byte1 & 0x0F) << 12) | ((array[i++] & 0x3F) << 6) | (array[i++] & 0x3F); } else if (String.fromCodePoint) { codePt = ((byte1 & 0x07) << 18) | ((array[i++] & 0x3F) << 12) | ((array[i++] & 0x3F) << 6) | (array[i++] & 0x3F); } else { codePt = 63; // Cannot convert four byte code points, so use "?" instead i += 3; } result.push(charCache[codePt] || (charCache[codePt] = charFromCodePt(codePt))); } return result.join(''); }; })();

Dans Node " Buffer instances Buffer sont également des instances Uint8Array ", donc buf.toString() fonctionne dans ce cas.

 class UTF8{ static encode(str:string){return new UTF8().encode(str)} static decode(data:Uint8Array){return new UTF8().decode(data)} private EOF_byte:number = -1; private EOF_code_point:number = -1; private encoderError(code_point) { console.error("UTF8 encoderError",code_point) } private decoderError(fatal, opt_code_point?):number { if (fatal) console.error("UTF8 decoderError",opt_code_point) return opt_code_point || 0xFFFD; } private inRange(a:number, min:number, max:number) { return min <= a && a <= max; } private div(n:number, d:number) { return Math.floor(n / d); } private stringToCodePoints(string:string) { /** @type {Array.<number>} */ let cps = []; // Based on http://www.w3.org/TR/WebIDL/#idl-DOMString let i = 0, n = string.length; while (i < string.length) { let c = string.charCodeAt(i); if (!this.inRange(c, 0xD800, 0xDFFF)) { cps.push(c); } else if (this.inRange(c, 0xDC00, 0xDFFF)) { cps.push(0xFFFD); } else { // (inRange(c, 0xD800, 0xDBFF)) if (i == n - 1) { cps.push(0xFFFD); } else { let d = string.charCodeAt(i + 1); if (this.inRange(d, 0xDC00, 0xDFFF)) { let a = c & 0x3FF; let b = d & 0x3FF; i += 1; cps.push(0x10000 + (a << 10) + b); } else { cps.push(0xFFFD); } } } i += 1; } return cps; } private encode(str:string):Uint8Array { let pos:number = 0; let codePoints = this.stringToCodePoints(str); let outputBytes = []; while (codePoints.length > pos) { let code_point:number = codePoints[pos++]; if (this.inRange(code_point, 0xD800, 0xDFFF)) { this.encoderError(code_point); } else if (this.inRange(code_point, 0x0000, 0x007f)) { outputBytes.push(code_point); } else { let count = 0, offset = 0; if (this.inRange(code_point, 0x0080, 0x07FF)) { count = 1; offset = 0xC0; } else if (this.inRange(code_point, 0x0800, 0xFFFF)) { count = 2; offset = 0xE0; } else if (this.inRange(code_point, 0x10000, 0x10FFFF)) { count = 3; offset = 0xF0; } outputBytes.push(this.div(code_point, Math.pow(64, count)) + offset); while (count > 0) { let temp = this.div(code_point, Math.pow(64, count - 1)); outputBytes.push(0x80 + (temp % 64)); count -= 1; } } } return new Uint8Array(outputBytes); } private decode(data:Uint8Array):string { let fatal:boolean = false; let pos:number = 0; let result:string = ""; let code_point:number; let utf8_code_point = 0; let utf8_bytes_needed = 0; let utf8_bytes_seen = 0; let utf8_lower_boundary = 0; while (data.length > pos) { let _byte = data[pos++]; if (_byte == this.EOF_byte) { if (utf8_bytes_needed != 0) { code_point = this.decoderError(fatal); } else { code_point = this.EOF_code_point; } } else { if (utf8_bytes_needed == 0) { if (this.inRange(_byte, 0x00, 0x7F)) { code_point = _byte; } else { if (this.inRange(_byte, 0xC2, 0xDF)) { utf8_bytes_needed = 1; utf8_lower_boundary = 0x80; utf8_code_point = _byte - 0xC0; } else if (this.inRange(_byte, 0xE0, 0xEF)) { utf8_bytes_needed = 2; utf8_lower_boundary = 0x800; utf8_code_point = _byte - 0xE0; } else if (this.inRange(_byte, 0xF0, 0xF4)) { utf8_bytes_needed = 3; utf8_lower_boundary = 0x10000; utf8_code_point = _byte - 0xF0; } else { this.decoderError(fatal); } utf8_code_point = utf8_code_point * Math.pow(64, utf8_bytes_needed); code_point = null; } } else if (!this.inRange(_byte, 0x80, 0xBF)) { utf8_code_point = 0; utf8_bytes_needed = 0; utf8_bytes_seen = 0; utf8_lower_boundary = 0; pos--; code_point = this.decoderError(fatal, _byte); } else { utf8_bytes_seen += 1; utf8_code_point = utf8_code_point + (_byte - 0x80) * Math.pow(64, utf8_bytes_needed - utf8_bytes_seen); if (utf8_bytes_seen !== utf8_bytes_needed) { code_point = null; } else { let cp = utf8_code_point; let lower_boundary = utf8_lower_boundary; utf8_code_point = 0; utf8_bytes_needed = 0; utf8_bytes_seen = 0; utf8_lower_boundary = 0; if (this.inRange(cp, lower_boundary, 0x10FFFF) && !this.inRange(cp, 0xD800, 0xDFFF)) { code_point = cp; } else { code_point = this.decoderError(fatal, _byte); } } } } //Decode string if (code_point !== null && code_point !== this.EOF_code_point) { if (code_point <= 0xFFFF) { if (code_point > 0)result += String.fromCharCode(code_point); } else { code_point -= 0x10000; result += String.fromCharCode(0xD800 + ((code_point >> 10) & 0x3ff)); result += String.fromCharCode(0xDC00 + (code_point & 0x3ff)); } } } return result; }

J'utilise cet extrait de type:

 function UInt8ArrayToString(uInt8Array: Uint8Array): string { var s: string = "["; for(var i: number = 0; i < uInt8Array.byteLength; i++) { if( i > 0 ) s += ", "; s += uInt8Array[i]; } s += "]"; return s; }

Supprimez les annotations de type si vous avez besoin de la version JavaScript. J'espère que cela t'aides!