[6] | 1 | <?php |
---|
| 2 | |
---|
| 3 | /** |
---|
| 4 | * @version $Id: core.php,v 1.5 2006/02/28 22:12:25 harryf Exp $ |
---|
| 5 | * @package utf8 |
---|
| 6 | * @subpackage strings |
---|
| 7 | */ |
---|
| 8 | |
---|
| 9 | // Define UTF8_CORE as required |
---|
| 10 | if (!defined('UTF8_CORE')) |
---|
| 11 | define('UTF8_CORE', true); |
---|
| 12 | |
---|
| 13 | /** |
---|
| 14 | * Wrapper round mb_strlen |
---|
| 15 | * Assumes you have mb_internal_encoding to UTF-8 already |
---|
| 16 | * Note: this function does not count bad bytes in the string - these |
---|
| 17 | * are simply ignored |
---|
| 18 | * @param string UTF-8 string |
---|
| 19 | * @return int number of UTF-8 characters in string |
---|
| 20 | * @package utf8 |
---|
| 21 | * @subpackage strings |
---|
| 22 | */ |
---|
| 23 | function utf8_strlen($str) |
---|
| 24 | { |
---|
| 25 | return mb_strlen($str); |
---|
| 26 | } |
---|
| 27 | |
---|
| 28 | /** |
---|
| 29 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
| 30 | * Wrapper around mb_strpos |
---|
| 31 | * Find position of first occurrence of a string |
---|
| 32 | * @param string haystack |
---|
| 33 | * @param string needle (you should validate this with utf8_is_valid) |
---|
| 34 | * @param integer offset in characters (from left) |
---|
| 35 | * @return mixed integer position or FALSE on failure |
---|
| 36 | * @package utf8 |
---|
| 37 | * @subpackage strings |
---|
| 38 | */ |
---|
| 39 | function utf8_strpos($str, $search, $offset = false) |
---|
| 40 | { |
---|
| 41 | // Strip unvalid characters |
---|
| 42 | $str = utf8_bad_strip($str); |
---|
| 43 | |
---|
| 44 | if ($offset === false) |
---|
| 45 | return mb_strpos($str, $search); |
---|
| 46 | else |
---|
| 47 | return mb_strpos($str, $search, $offset); |
---|
| 48 | } |
---|
| 49 | |
---|
| 50 | /** |
---|
| 51 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
| 52 | * Wrapper around mb_strrpos |
---|
| 53 | * Find position of last occurrence of a char in a string |
---|
| 54 | * @param string haystack |
---|
| 55 | * @param string needle (you should validate this with utf8_is_valid) |
---|
| 56 | * @param integer (optional) offset (from left) |
---|
| 57 | * @return mixed integer position or FALSE on failure |
---|
| 58 | * @package utf8 |
---|
| 59 | * @subpackage strings |
---|
| 60 | */ |
---|
| 61 | function utf8_strrpos($str, $search, $offset = false) |
---|
| 62 | { |
---|
| 63 | // Strip unvalid characters |
---|
| 64 | $str = utf8_bad_strip($str); |
---|
| 65 | |
---|
| 66 | if (!$offset) |
---|
| 67 | { |
---|
| 68 | // Emulate behaviour of strrpos rather than raising warning |
---|
| 69 | if (empty($str)) |
---|
| 70 | return false; |
---|
| 71 | |
---|
| 72 | return mb_strrpos($str, $search); |
---|
| 73 | } |
---|
| 74 | else |
---|
| 75 | { |
---|
| 76 | if (!is_int($offset)) |
---|
| 77 | { |
---|
| 78 | trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING); |
---|
| 79 | return false; |
---|
| 80 | } |
---|
| 81 | |
---|
| 82 | $str = mb_substr($str, $offset); |
---|
| 83 | |
---|
| 84 | if (($pos = mb_strrpos($str, $search)) !== false) |
---|
| 85 | return $pos + $offset; |
---|
| 86 | |
---|
| 87 | return false; |
---|
| 88 | } |
---|
| 89 | } |
---|
| 90 | |
---|
| 91 | /** |
---|
| 92 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
| 93 | * Wrapper around mb_substr |
---|
| 94 | * Return part of a string given character offset (and optionally length) |
---|
| 95 | * @param string |
---|
| 96 | * @param integer number of UTF-8 characters offset (from left) |
---|
| 97 | * @param integer (optional) length in UTF-8 characters from offset |
---|
| 98 | * @return mixed string or FALSE if failure |
---|
| 99 | * @package utf8 |
---|
| 100 | * @subpackage strings |
---|
| 101 | */ |
---|
| 102 | function utf8_substr($str, $offset, $length = false) |
---|
| 103 | { |
---|
| 104 | if ($length === false) |
---|
| 105 | return mb_substr($str, $offset); |
---|
| 106 | else |
---|
| 107 | return mb_substr($str, $offset, $length); |
---|
| 108 | } |
---|
| 109 | |
---|
| 110 | /** |
---|
| 111 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
| 112 | * Wrapper around mb_strtolower |
---|
| 113 | * Make a string lowercase |
---|
| 114 | * Note: The concept of a characters "case" only exists is some alphabets |
---|
| 115 | * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
---|
| 116 | * not exist in the Chinese alphabet, for example. See Unicode Standard |
---|
| 117 | * Annex #21: Case Mappings |
---|
| 118 | * @param string |
---|
| 119 | * @return mixed either string in lowercase or FALSE is UTF-8 invalid |
---|
| 120 | * @package utf8 |
---|
| 121 | * @subpackage strings |
---|
| 122 | */ |
---|
| 123 | function utf8_strtolower($str) |
---|
| 124 | { |
---|
| 125 | return mb_strtolower($str); |
---|
| 126 | } |
---|
| 127 | |
---|
| 128 | /** |
---|
| 129 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
| 130 | * Wrapper around mb_strtoupper |
---|
| 131 | * Make a string uppercase |
---|
| 132 | * Note: The concept of a characters "case" only exists is some alphabets |
---|
| 133 | * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
---|
| 134 | * not exist in the Chinese alphabet, for example. See Unicode Standard |
---|
| 135 | * Annex #21: Case Mappings |
---|
| 136 | * @param string |
---|
| 137 | * @return mixed either string in lowercase or FALSE is UTF-8 invalid |
---|
| 138 | * @package utf8 |
---|
| 139 | * @subpackage strings |
---|
| 140 | */ |
---|
| 141 | function utf8_strtoupper($str) |
---|
| 142 | { |
---|
| 143 | return mb_strtoupper($str); |
---|
| 144 | } |
---|