2
0

encodedstream.h 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. #ifndef RAPIDJSON_ENCODEDSTREAM_H_
  2. #define RAPIDJSON_ENCODEDSTREAM_H_
  3. #include "rapidjson.h"
  4. namespace rapidjson {
  5. //! Input byte stream wrapper with a statically bound encoding.
  6. /*!
  7. \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
  8. \tparam InputByteStream Type of input byte stream. For example, FileReadStream.
  9. */
  10. template <typename Encoding, typename InputByteStream>
  11. class EncodedInputStream {
  12. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  13. public:
  14. typedef typename Encoding::Ch Ch;
  15. EncodedInputStream(InputByteStream& is) : is_(is) {
  16. current_ = Encoding::TakeBOM(is_);
  17. }
  18. Ch Peek() const { return current_; }
  19. Ch Take() { Ch c = current_; current_ = Encoding::Take(is_); return c; }
  20. size_t Tell() const { return is_.Tell(); }
  21. // Not implemented
  22. void Put(Ch c) { RAPIDJSON_ASSERT(false); }
  23. void Flush() { RAPIDJSON_ASSERT(false); }
  24. Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
  25. size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
  26. private:
  27. // Prohibit assignment for VC C4512 warning
  28. EncodedInputStream& operator=(const EncodedInputStream&);
  29. InputByteStream& is_;
  30. Ch current_;
  31. };
  32. //! Output byte stream wrapper with statically bound encoding.
  33. /*!
  34. \tparam Encoding The interpretation of encoding of the stream. Either UTF8, UTF16LE, UTF16BE, UTF32LE, UTF32BE.
  35. \tparam InputByteStream Type of input byte stream. For example, FileWriteStream.
  36. */
  37. template <typename Encoding, typename OutputByteStream>
  38. class EncodedOutputStream {
  39. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  40. public:
  41. typedef typename Encoding::Ch Ch;
  42. EncodedOutputStream(OutputByteStream& os, bool putBOM = true) : os_(os) {
  43. if (putBOM)
  44. Encoding::PutBOM(os_);
  45. }
  46. void Put(Ch c) { Encoding::Put(os_, c); }
  47. void Flush() { os_.Flush(); }
  48. // Not implemented
  49. Ch Peek() const { RAPIDJSON_ASSERT(false); }
  50. Ch Take() { RAPIDJSON_ASSERT(false); }
  51. size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
  52. Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
  53. size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
  54. private:
  55. // Prohibit assignment for VC C4512 warning
  56. EncodedOutputStream& operator=(const EncodedOutputStream&);
  57. OutputByteStream& os_;
  58. };
  59. #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
  60. //! Input stream wrapper with dynamically bound encoding and automatic encoding detection.
  61. /*!
  62. \tparam CharType Type of character for reading.
  63. \tparam InputByteStream type of input byte stream to be wrapped.
  64. */
  65. template <typename CharType, typename InputByteStream>
  66. class AutoUTFInputStream {
  67. RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
  68. public:
  69. typedef CharType Ch;
  70. //! Constructor.
  71. /*!
  72. \param is input stream to be wrapped.
  73. \param type UTF encoding type if it is not detected from the stream.
  74. */
  75. AutoUTFInputStream(InputByteStream& is, UTFType type = kUTF8) : is_(&is), type_(type), hasBOM_(false) {
  76. DetectType();
  77. static const TakeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Take) };
  78. takeFunc_ = f[type_];
  79. current_ = takeFunc_(*is_);
  80. }
  81. UTFType GetType() const { return type_; }
  82. bool HasBOM() const { return hasBOM_; }
  83. Ch Peek() const { return current_; }
  84. Ch Take() { Ch c = current_; current_ = takeFunc_(*is_); return c; }
  85. size_t Tell() const { return is_->Tell(); }
  86. // Not implemented
  87. void Put(Ch) { RAPIDJSON_ASSERT(false); }
  88. void Flush() { RAPIDJSON_ASSERT(false); }
  89. Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
  90. size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
  91. private:
  92. // Detect encoding type with BOM or RFC 4627
  93. void DetectType() {
  94. // BOM (Byte Order Mark):
  95. // 00 00 FE FF UTF-32BE
  96. // FF FE 00 00 UTF-32LE
  97. // FE FF UTF-16BE
  98. // FF FE UTF-16LE
  99. // EF BB BF UTF-8
  100. const unsigned char* c = (const unsigned char *)is_->Peek4();
  101. if (!c)
  102. return;
  103. unsigned bom = c[0] | (c[1] << 8) | (c[2] << 16) | (c[3] << 24);
  104. hasBOM_ = false;
  105. if (bom == 0xFFFE0000) { type_ = kUTF32BE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
  106. else if (bom == 0x0000FEFF) { type_ = kUTF32LE; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); is_->Take(); }
  107. else if ((bom & 0xFFFF) == 0xFFFE) { type_ = kUTF16BE; hasBOM_ = true; is_->Take(); is_->Take(); }
  108. else if ((bom & 0xFFFF) == 0xFEFF) { type_ = kUTF16LE; hasBOM_ = true; is_->Take(); is_->Take(); }
  109. else if ((bom & 0xFFFFFF) == 0xBFBBEF) { type_ = kUTF8; hasBOM_ = true; is_->Take(); is_->Take(); is_->Take(); }
  110. // RFC 4627: Section 3
  111. // "Since the first two characters of a JSON text will always be ASCII
  112. // characters [RFC0020], it is possible to determine whether an octet
  113. // stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
  114. // at the pattern of nulls in the first four octets."
  115. // 00 00 00 xx UTF-32BE
  116. // 00 xx 00 xx UTF-16BE
  117. // xx 00 00 00 UTF-32LE
  118. // xx 00 xx 00 UTF-16LE
  119. // xx xx xx xx UTF-8
  120. if (!hasBOM_) {
  121. unsigned pattern = (c[0] ? 1 : 0) | (c[1] ? 2 : 0) | (c[2] ? 4 : 0) | (c[3] ? 8 : 0);
  122. switch (pattern) {
  123. case 0x08: type_ = kUTF32BE; break;
  124. case 0x0A: type_ = kUTF16BE; break;
  125. case 0x01: type_ = kUTF32LE; break;
  126. case 0x05: type_ = kUTF16LE; break;
  127. case 0x0F: type_ = kUTF8; break;
  128. }
  129. }
  130. // RUntime check whether the size of character type is sufficient. It only perform checks with assertion.
  131. switch (type_) {
  132. case kUTF8:
  133. // Do nothing
  134. break;
  135. case kUTF16LE:
  136. case kUTF16BE:
  137. RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
  138. break;
  139. case kUTF32LE:
  140. case kUTF32BE:
  141. RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
  142. break;
  143. }
  144. }
  145. typedef Ch (*TakeFunc)(InputByteStream& is);
  146. InputByteStream* is_;
  147. UTFType type_;
  148. Ch current_;
  149. TakeFunc takeFunc_;
  150. bool hasBOM_;
  151. };
  152. //! Output stream wrapper with dynamically bound encoding and automatic encoding detection.
  153. /*!
  154. \tparam CharType Type of character for writing.
  155. \tparam InputByteStream type of output byte stream to be wrapped.
  156. */
  157. template <typename CharType, typename OutputByteStream>
  158. class AutoUTFOutputStream {
  159. RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
  160. public:
  161. typedef CharType Ch;
  162. //! Constructor.
  163. /*!
  164. \param os output stream to be wrapped.
  165. \param type UTF encoding type.
  166. \param putBOM Whether to write BOM at the beginning of the stream.
  167. */
  168. AutoUTFOutputStream(OutputByteStream& os, UTFType type, bool putBOM) : os_(&os), type_(type) {
  169. // RUntime check whether the size of character type is sufficient. It only perform checks with assertion.
  170. switch (type_) {
  171. case kUTF16LE:
  172. case kUTF16BE:
  173. RAPIDJSON_ASSERT(sizeof(Ch) >= 2);
  174. break;
  175. case kUTF32LE:
  176. case kUTF32BE:
  177. RAPIDJSON_ASSERT(sizeof(Ch) >= 4);
  178. break;
  179. case kUTF8:
  180. // Do nothing
  181. break;
  182. }
  183. static const PutFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Put) };
  184. putFunc_ = f[type_];
  185. if (putBOM)
  186. PutBOM();
  187. }
  188. UTFType GetType() const { return type_; }
  189. void Put(Ch c) { putFunc_(*os_, c); }
  190. void Flush() { os_->Flush(); }
  191. // Not implemented
  192. Ch Peek() const { RAPIDJSON_ASSERT(false); }
  193. Ch Take() { RAPIDJSON_ASSERT(false); }
  194. size_t Tell() const { RAPIDJSON_ASSERT(false); return 0; }
  195. Ch* PutBegin() { RAPIDJSON_ASSERT(false); return 0; }
  196. size_t PutEnd(Ch*) { RAPIDJSON_ASSERT(false); return 0; }
  197. private:
  198. void PutBOM() {
  199. typedef void (*PutBOMFunc)(OutputByteStream&);
  200. static const PutBOMFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(PutBOM) };
  201. f[type_](*os_);
  202. }
  203. typedef void (*PutFunc)(OutputByteStream&, Ch);
  204. OutputByteStream* os_;
  205. UTFType type_;
  206. PutFunc putFunc_;
  207. };
  208. #undef RAPIDJSON_ENCODINGS_FUNC
  209. } // namespace rapidjson
  210. #endif // RAPIDJSON_FILESTREAM_H_