--- json/reader.inl 2011-12-04 15:16:48.000000000 -0700 +++ json/reader.inl 2013-02-23 16:17:10.000000000 -0700 @@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI TODO: * better documentation -* unicode character decoding */ @@ -308,7 +307,7 @@ inline std::string Reader::MatchString(I // escape? if (c == '\\' && - inputStream.EOS() == false) // shouldn't have reached the end yet + inputStream.EOS() == false) // shouldn't have reached the end yet { c = inputStream.Get(); switch (c) { @@ -320,7 +319,37 @@ inline std::string Reader::MatchString(I case 'n': string.push_back('\n'); break; case 'r': string.push_back('\r'); break; case 't': string.push_back('\t'); break; - case 'u': string.push_back('\u'); break; // TODO: what do we do with this? + case 'u': { // convert unicode to UTF-8 + int x = 0, i; + + // next four characters should be hex + for (i = 0; i < 4; ++i) { + c = inputStream.Get(); + if (c >= '0' && c <= '9') { + x = (x << 4) | (c - '0'); + } else if (c >= 'a' && c <= 'f') { + x = (x << 4) | (c - 'a' + 10); + } else if (c >= 'A' && c <= 'F') { + x = (x << 4) | (c - 'A' + 10); + } else { + std::string sMessage = std::string("Unrecognized hexadecimal character found in string: ") + c; + throw ScanException(sMessage, inputStream.GetLocation()); + } + } + + // encode as UTF-8 + if (x < 0x80) { + string.push_back(x); + } else if (x < 0x800) { + string.push_back(0xc0 | (x >> 6)); + string.push_back(0x80 | (x & 0x3f)); + } else { + string.push_back(0xe0 | (x >> 12)); + string.push_back(0x80 | ((x >> 6) & 0x3f)); + string.push_back(0x80 | (x & 0x3f)); + } + break; + } default: { std::string sMessage = std::string("Unrecognized escape sequence found in string: \\") + c; throw ScanException(sMessage, inputStream.GetLocation()); --- json/writer.inl 2011-12-04 15:16:48.000000000 -0700 +++ json/writer.inl 2013-02-25 09:29:35.000000000 -0700 @@ -35,7 +35,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBI TODO: * better documentation -* unicode character encoding */ @@ -122,7 +121,7 @@ inline void Writer::Write_i(const Object inline void Writer::Write_i(const Number& numberElement) { - m_ostr << std::setprecision(20) << numberElement.Value(); + m_ostr << std::dec << std::setprecision(20) << numberElement.Value(); } inline void Writer::Write_i(const Boolean& booleanElement) @@ -139,6 +138,48 @@ inline void Writer::Write_i(const String itEnd(s.end()); for (; it != itEnd; ++it) { + // check for UTF-8 unicode encoding + unsigned char u = static_cast(*it); + if (u & 0xc0) { + if ((u & 0xe0) == 0xc0) { + // two-character sequence + int x = (*it & 0x1f) << 6; + if ((it + 1) == itEnd) { + m_ostr << *it; continue; + } + u = static_cast(*(it + 1)); + if ((u & 0xc0) == 0x80) { + x |= u & 0x3f; + m_ostr << "\\u" << std::hex << std::setfill('0') + << std::setw(4) << x; + ++it; + continue; + } + + } else if ((u & 0xf0) == 0xe0) { + // three-character sequence + int x = (u & 0x0f) << 12; + if ((it + 1) == itEnd) { + m_ostr << *it; continue; + } + u = static_cast(*(it + 1)); + if ((u & 0xc0) == 0x80) { + x |= (u & 0x3f) << 6; + if ((it + 2) == itEnd) { + m_ostr << *it; continue; + } + u = static_cast(*(it + 2)); + if ((u & 0xc0) == 0x80) { + x |= u & 0x3f; + m_ostr << "\\u" << std::hex << std::setfill('0') + << std::setw(4) << x; + it = it + 2; + continue; + } + } + } + } + switch (*it) { case '"': m_ostr << "\\\""; break; @@ -148,7 +189,6 @@ inline void Writer::Write_i(const String case '\n': m_ostr << "\\n"; break; case '\r': m_ostr << "\\r"; break; case '\t': m_ostr << "\\t"; break; - case '\u': m_ostr << "\\u"; break; // uh... default: m_ostr << *it; break; } }