UTF8Encoding.h 1.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. //
  2. // UTF8Encoding.h
  3. //
  4. // $Id: //poco/1.4/Foundation/include/Poco/UTF8Encoding.h#1 $
  5. //
  6. // Library: Foundation
  7. // Package: Text
  8. // Module: UTF8Encoding
  9. //
  10. // Definition of the UTF8Encoding class.
  11. //
  12. // Copyright (c) 2004-2007, Applied Informatics Software Engineering GmbH.
  13. // and Contributors.
  14. //
  15. // SPDX-License-Identifier: BSL-1.0
  16. //
  17. #ifndef Foundation_UTF8Encoding_INCLUDED
  18. #define Foundation_UTF8Encoding_INCLUDED
  19. #include "Poco/Foundation.h"
  20. #include "Poco/TextEncoding.h"
  21. namespace Poco {
  22. class Foundation_API UTF8Encoding: public TextEncoding
  23. /// UTF-8 text encoding, as defined in RFC 2279.
  24. {
  25. public:
  26. UTF8Encoding();
  27. ~UTF8Encoding();
  28. const char* canonicalName() const;
  29. bool isA(const std::string& encodingName) const;
  30. const CharacterMap& characterMap() const;
  31. int convert(const unsigned char* bytes) const;
  32. int convert(int ch, unsigned char* bytes, int length) const;
  33. int queryConvert(const unsigned char* bytes, int length) const;
  34. int sequenceLength(const unsigned char* bytes, int length) const;
  35. static bool isLegal(const unsigned char *bytes, int length);
  36. /// Utility routine to tell whether a sequence of bytes is legal UTF-8.
  37. /// This must be called with the length pre-determined by the first byte.
  38. /// The sequence is illegal right away if there aren't enough bytes
  39. /// available. If presented with a length > 4, this function returns false.
  40. /// The Unicode definition of UTF-8 goes up to 4-byte sequences.
  41. ///
  42. /// Adapted from ftp://ftp.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
  43. /// Copyright 2001-2004 Unicode, Inc.
  44. private:
  45. static const char* _names[];
  46. static const CharacterMap _charMap;
  47. };
  48. } // namespace Poco
  49. #endif // Foundation_UTF8Encoding_INCLUDED