The LM Control website. Simple yet efficient.
Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

utf16.js 4.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. "use strict";
  2. var Buffer = require("safer-buffer").Buffer;
  3. // Note: UTF16-LE (or UCS2) codec is Node.js native. See encodings/internal.js
  4. // == UTF16-BE codec. ==========================================================
  5. exports.utf16be = Utf16BECodec;
  6. function Utf16BECodec() {
  7. }
  8. Utf16BECodec.prototype.encoder = Utf16BEEncoder;
  9. Utf16BECodec.prototype.decoder = Utf16BEDecoder;
  10. Utf16BECodec.prototype.bomAware = true;
  11. // -- Encoding
  12. function Utf16BEEncoder() {
  13. }
  14. Utf16BEEncoder.prototype.write = function(str) {
  15. var buf = Buffer.from(str, 'ucs2');
  16. for (var i = 0; i < buf.length; i += 2) {
  17. var tmp = buf[i]; buf[i] = buf[i+1]; buf[i+1] = tmp;
  18. }
  19. return buf;
  20. }
  21. Utf16BEEncoder.prototype.end = function() {
  22. }
  23. // -- Decoding
  24. function Utf16BEDecoder() {
  25. this.overflowByte = -1;
  26. }
  27. Utf16BEDecoder.prototype.write = function(buf) {
  28. if (buf.length == 0)
  29. return '';
  30. var buf2 = Buffer.alloc(buf.length + 1),
  31. i = 0, j = 0;
  32. if (this.overflowByte !== -1) {
  33. buf2[0] = buf[0];
  34. buf2[1] = this.overflowByte;
  35. i = 1; j = 2;
  36. }
  37. for (; i < buf.length-1; i += 2, j+= 2) {
  38. buf2[j] = buf[i+1];
  39. buf2[j+1] = buf[i];
  40. }
  41. this.overflowByte = (i == buf.length-1) ? buf[buf.length-1] : -1;
  42. return buf2.slice(0, j).toString('ucs2');
  43. }
  44. Utf16BEDecoder.prototype.end = function() {
  45. }
  46. // == UTF-16 codec =============================================================
  47. // Decoder chooses automatically from UTF-16LE and UTF-16BE using BOM and space-based heuristic.
  48. // Defaults to UTF-16LE, as it's prevalent and default in Node.
  49. // http://en.wikipedia.org/wiki/UTF-16 and http://encoding.spec.whatwg.org/#utf-16le
  50. // Decoder default can be changed: iconv.decode(buf, 'utf16', {defaultEncoding: 'utf-16be'});
  51. // Encoder uses UTF-16LE and prepends BOM (which can be overridden with addBOM: false).
  52. exports.utf16 = Utf16Codec;
  53. function Utf16Codec(codecOptions, iconv) {
  54. this.iconv = iconv;
  55. }
  56. Utf16Codec.prototype.encoder = Utf16Encoder;
  57. Utf16Codec.prototype.decoder = Utf16Decoder;
  58. // -- Encoding (pass-through)
  59. function Utf16Encoder(options, codec) {
  60. options = options || {};
  61. if (options.addBOM === undefined)
  62. options.addBOM = true;
  63. this.encoder = codec.iconv.getEncoder('utf-16le', options);
  64. }
  65. Utf16Encoder.prototype.write = function(str) {
  66. return this.encoder.write(str);
  67. }
  68. Utf16Encoder.prototype.end = function() {
  69. return this.encoder.end();
  70. }
  71. // -- Decoding
  72. function Utf16Decoder(options, codec) {
  73. this.decoder = null;
  74. this.initialBytes = [];
  75. this.initialBytesLen = 0;
  76. this.options = options || {};
  77. this.iconv = codec.iconv;
  78. }
  79. Utf16Decoder.prototype.write = function(buf) {
  80. if (!this.decoder) {
  81. // Codec is not chosen yet. Accumulate initial bytes.
  82. this.initialBytes.push(buf);
  83. this.initialBytesLen += buf.length;
  84. if (this.initialBytesLen < 16) // We need more bytes to use space heuristic (see below)
  85. return '';
  86. // We have enough bytes -> detect endianness.
  87. var buf = Buffer.concat(this.initialBytes),
  88. encoding = detectEncoding(buf, this.options.defaultEncoding);
  89. this.decoder = this.iconv.getDecoder(encoding, this.options);
  90. this.initialBytes.length = this.initialBytesLen = 0;
  91. }
  92. return this.decoder.write(buf);
  93. }
  94. Utf16Decoder.prototype.end = function() {
  95. if (!this.decoder) {
  96. var buf = Buffer.concat(this.initialBytes),
  97. encoding = detectEncoding(buf, this.options.defaultEncoding);
  98. this.decoder = this.iconv.getDecoder(encoding, this.options);
  99. var res = this.decoder.write(buf),
  100. trail = this.decoder.end();
  101. return trail ? (res + trail) : res;
  102. }
  103. return this.decoder.end();
  104. }
  105. function detectEncoding(buf, defaultEncoding) {
  106. var enc = defaultEncoding || 'utf-16le';
  107. if (buf.length >= 2) {
  108. // Check BOM.
  109. if (buf[0] == 0xFE && buf[1] == 0xFF) // UTF-16BE BOM
  110. enc = 'utf-16be';
  111. else if (buf[0] == 0xFF && buf[1] == 0xFE) // UTF-16LE BOM
  112. enc = 'utf-16le';
  113. else {
  114. // No BOM found. Try to deduce encoding from initial content.
  115. // Most of the time, the content has ASCII chars (U+00**), but the opposite (U+**00) is uncommon.
  116. // So, we count ASCII as if it was LE or BE, and decide from that.
  117. var asciiCharsLE = 0, asciiCharsBE = 0, // Counts of chars in both positions
  118. _len = Math.min(buf.length - (buf.length % 2), 64); // Len is always even.
  119. for (var i = 0; i < _len; i += 2) {
  120. if (buf[i] === 0 && buf[i+1] !== 0) asciiCharsBE++;
  121. if (buf[i] !== 0 && buf[i+1] === 0) asciiCharsLE++;
  122. }
  123. if (asciiCharsBE > asciiCharsLE)
  124. enc = 'utf-16be';
  125. else if (asciiCharsBE < asciiCharsLE)
  126. enc = 'utf-16le';
  127. }
  128. }
  129. return enc;
  130. }