using System.Collections.Generic; using System.Text; namespace Quobject.EngineIoClientDotNet.Modules { /// /// UTF-8 encoder/decoder ported from utf8.js. /// Ported from the JavaScript module. /// https://github.com/mathiasbynens/utf8.js /// public class UTF8 { private static List byteArray; private static int byteCount; private static int byteIndex; public static string Encode(string str) { List codePoints = Ucs2Decode(str); var length = codePoints.Count; var index = -1; var byteString = new StringBuilder(); while (++index < length) { var codePoint = codePoints[index]; byteString.Append(EncodeCodePoint(codePoint)); } return byteString.ToString(); } public static string Decode(string byteString) { byteArray = Ucs2Decode(byteString); byteCount = byteArray.Count; byteIndex = 0; var codePoints = new List(); int tmp; while ((tmp = DecodeSymbol()) != -1) { codePoints.Add(tmp); } return Ucs2Encode(codePoints); } private static int DecodeSymbol() { int byte1; int byte2; int byte3; int byte4; int codePoint; if (byteIndex > byteCount) { throw new UTF8Exception("Invalid byte index"); } if (byteIndex == byteCount) { return -1; } byte1 = byteArray[byteIndex] & 0xFF; byteIndex++; if ((byte1 & 0x80) == 0) { return byte1; } if ((byte1 & 0xE0) == 0xC0) { byte2 = ReadContinuationByte(); codePoint = ((byte1 & 0x1F) << 6) | byte2; if (codePoint >= 0x80) { return codePoint; } else { throw new UTF8Exception("Invalid continuation byte"); } } if ((byte1 & 0xF0) == 0xE0) { byte2 = ReadContinuationByte(); byte3 = ReadContinuationByte(); codePoint = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3; if (codePoint >= 0x0800) { return codePoint; } else { throw new UTF8Exception("Invalid continuation byte"); } } if ((byte1 & 0xF8) == 0xF0) { byte2 = ReadContinuationByte(); byte3 = ReadContinuationByte(); byte4 = ReadContinuationByte(); codePoint = ((byte1 & 0x0F) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4; if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { return codePoint; } } throw new UTF8Exception("Invalid continuation byte"); } private static int ReadContinuationByte() { if (byteIndex >= byteCount) { throw new UTF8Exception("Invalid byte index"); } int continuationByte = byteArray[byteIndex] & 0xFF; byteIndex++; if ((continuationByte & 0xC0) == 0x80) { return continuationByte & 0x3F; } throw new UTF8Exception("Invalid continuation byte"); } private static string EncodeCodePoint(int codePoint) { var sb = new StringBuilder(); if ((codePoint & 0xFFFFFF80) == 0) { // 1-byte sequence sb.Append((char) codePoint); return sb.ToString(); } if ((codePoint & 0xFFFFF800) == 0) { // 2-byte sequence sb.Append((char) (((codePoint >> 6) & 0x1F) | 0xC0)); } else if ((codePoint & 0xFFFF0000) == 0) { // 3-byte sequence sb.Append((char) (((codePoint >> 12) & 0x0F) | 0xE0)); sb.Append( CreateByte(codePoint, 6)); } else if ((codePoint & 0xFFE00000) == 0) { // 4-byte sequence sb.Append((char) (((codePoint >> 18) & 0x07) | 0xF0)); sb.Append( CreateByte(codePoint, 12)); sb.Append( CreateByte(codePoint, 6)); } sb.Append((char) ((codePoint & 0x3F) | 0x80)); return sb.ToString(); } private static char CreateByte(int codePoint, int shift) { return (char)(((codePoint >> shift) & 0x3F) | 0x80); } private static List Ucs2Decode(string str) { var output = new List(); var counter = 0; var length = str.Length; while (counter < length) { var value = (int)str[counter++]; if (value >= 0xD800 && value <= 0xDBFF && counter < length) { // high surrogate, and there is a next character var extra = (int)str[counter++]; if ((extra & 0xFC00) == 0xDC00) { // low surrogate output.Add(((value & 0x3FF) << 10) + (extra & 0x3FF) + 0x10000); } else { // unmatched surrogate; only append this code unit, in case the next // code unit is the high surrogate of a surrogate pair output.Add(value); counter--; } } else { output.Add(value); } } return output; } private static string Ucs2Encode(List array) { var sb = new StringBuilder(); var index = -1; while (++index < array.Count) { var value = array[index]; if (value > 0xFFFF) { value -= 0x10000; sb.Append((char)(((int)((uint)value >> 10)) & 0x3FF | 0xD800)); value = 0xDC00 | value & 0x3FF; } sb.Append((char)value); } return sb.ToString(); } } }