bifocal/node_modules/text-decoder/lib/utf8-decoder.js

97 lines
2.3 KiB
JavaScript

const b4a = require('b4a')
/**
* https://encoding.spec.whatwg.org/#utf-8-decoder
*/
module.exports = class UTF8Decoder {
constructor () {
this.codePoint = 0
this.bytesSeen = 0
this.bytesNeeded = 0
this.lowerBoundary = 0x80
this.upperBoundary = 0xbf
}
decode (data) {
// If we have a fast path, just sniff if the last part is a boundary
if (this.bytesNeeded === 0) {
let isBoundary = true
for (let i = Math.max(0, data.byteLength - 4), n = data.byteLength; i < n && isBoundary; i++) {
isBoundary = data[i] <= 0x7f
}
if (isBoundary) return b4a.toString(data, 'utf8')
}
let result = ''
for (let i = 0, n = data.byteLength; i < n; i++) {
const byte = data[i]
if (this.bytesNeeded === 0) {
if (byte <= 0x7f) {
result += String.fromCharCode(byte)
} else if (byte >= 0xc2 && byte <= 0xdf) {
this.bytesNeeded = 1
this.codePoint = byte & 0x1f
} else if (byte >= 0xe0 && byte <= 0xef) {
if (byte === 0xe0) this.lowerBoundary = 0xa0
else if (byte === 0xed) this.upperBoundary = 0x9f
this.bytesNeeded = 2
this.codePoint = byte & 0xf
} else if (byte >= 0xf0 && byte <= 0xf4) {
if (byte === 0xf0) this.lowerBoundary = 0x90
if (byte === 0xf4) this.upperBoundary = 0x8f
this.bytesNeeded = 3
this.codePoint = byte & 0x7
} else {
result += '\ufffd'
}
continue
}
if (byte < this.lowerBoundary || byte > this.upperBoundary) {
this.codePoint = 0
this.bytesNeeded = 0
this.bytesSeen = 0
this.lowerBoundary = 0x80
this.upperBoundary = 0xbf
result += '\ufffd'
continue
}
this.lowerBoundary = 0x80
this.upperBoundary = 0xbf
this.codePoint = (this.codePoint << 6) | (byte & 0x3f)
this.bytesSeen++
if (this.bytesSeen !== this.bytesNeeded) continue
result += String.fromCodePoint(this.codePoint)
this.codePoint = 0
this.bytesNeeded = 0
this.bytesSeen = 0
}
return result
}
flush () {
const result = this.bytesNeeded > 0 ? '\ufffd' : ''
this.codePoint = 0
this.bytesNeeded = 0
this.bytesSeen = 0
this.lowerBoundary = 0x80
this.upperBoundary = 0xbf
return result
}
}