Utf8.php 3.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. <?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
  2. /**
  3. * CodeIgniter
  4. *
  5. * An open source application development framework for PHP 5.1.6 or newer
  6. *
  7. * @package CodeIgniter
  8. * @author ExpressionEngine Dev Team
  9. * @copyright Copyright (c) 2008 - 2011, EllisLab, Inc.
  10. * @license http://codeigniter.com/user_guide/license.html
  11. * @link http://codeigniter.com
  12. * @since Version 2.0
  13. * @filesource
  14. */
  15. // ------------------------------------------------------------------------
  16. /**
  17. * Utf8 Class
  18. *
  19. * Provides support for UTF-8 environments
  20. *
  21. * @package CodeIgniter
  22. * @subpackage Libraries
  23. * @category UTF-8
  24. * @author ExpressionEngine Dev Team
  25. * @link http://codeigniter.com/user_guide/libraries/utf8.html
  26. */
  27. class CI_Utf8 {
  28. /**
  29. * Constructor
  30. *
  31. * Determines if UTF-8 support is to be enabled
  32. *
  33. */
  34. function __construct()
  35. {
  36. log_message('debug', "Utf8 Class Initialized");
  37. global $CFG;
  38. if (
  39. preg_match('/./u', 'é') === 1 // PCRE must support UTF-8
  40. AND function_exists('iconv') // iconv must be installed
  41. AND ini_get('mbstring.func_overload') != 1 // Multibyte string function overloading cannot be enabled
  42. AND $CFG->item('charset') == 'UTF-8' // Application charset must be UTF-8
  43. )
  44. {
  45. log_message('debug', "UTF-8 Support Enabled");
  46. define('UTF8_ENABLED', TRUE);
  47. // set internal encoding for multibyte string functions if necessary
  48. // and set a flag so we don't have to repeatedly use extension_loaded()
  49. // or function_exists()
  50. if (extension_loaded('mbstring'))
  51. {
  52. define('MB_ENABLED', TRUE);
  53. mb_internal_encoding('UTF-8');
  54. }
  55. else
  56. {
  57. define('MB_ENABLED', FALSE);
  58. }
  59. }
  60. else
  61. {
  62. log_message('debug', "UTF-8 Support Disabled");
  63. define('UTF8_ENABLED', FALSE);
  64. }
  65. }
  66. // --------------------------------------------------------------------
  67. /**
  68. * Clean UTF-8 strings
  69. *
  70. * Ensures strings are UTF-8
  71. *
  72. * @access public
  73. * @param string
  74. * @return string
  75. */
  76. function clean_string($str)
  77. {
  78. if ($this->_is_ascii($str) === FALSE)
  79. {
  80. $str = @iconv('UTF-8', 'UTF-8//IGNORE', $str);
  81. }
  82. return $str;
  83. }
  84. // --------------------------------------------------------------------
  85. /**
  86. * Remove ASCII control characters
  87. *
  88. * Removes all ASCII control characters except horizontal tabs,
  89. * line feeds, and carriage returns, as all others can cause
  90. * problems in XML
  91. *
  92. * @access public
  93. * @param string
  94. * @return string
  95. */
  96. function safe_ascii_for_xml($str)
  97. {
  98. return remove_invisible_characters($str, FALSE);
  99. }
  100. // --------------------------------------------------------------------
  101. /**
  102. * Convert to UTF-8
  103. *
  104. * Attempts to convert a string to UTF-8
  105. *
  106. * @access public
  107. * @param string
  108. * @param string - input encoding
  109. * @return string
  110. */
  111. function convert_to_utf8($str, $encoding)
  112. {
  113. if (function_exists('iconv'))
  114. {
  115. $str = @iconv($encoding, 'UTF-8', $str);
  116. }
  117. elseif (function_exists('mb_convert_encoding'))
  118. {
  119. $str = @mb_convert_encoding($str, 'UTF-8', $encoding);
  120. }
  121. else
  122. {
  123. return FALSE;
  124. }
  125. return $str;
  126. }
  127. // --------------------------------------------------------------------
  128. /**
  129. * Is ASCII?
  130. *
  131. * Tests if a string is standard 7-bit ASCII or not
  132. *
  133. * @access public
  134. * @param string
  135. * @return bool
  136. */
  137. function _is_ascii($str)
  138. {
  139. return (preg_match('/[^\x00-\x7F]/S', $str) == 0);
  140. }
  141. // --------------------------------------------------------------------
  142. }
  143. // End Utf8 Class
  144. /* End of file Utf8.php */
  145. /* Location: ./system/core/Utf8.php */