1 | <?php |
---|
2 | |
---|
3 | /** |
---|
4 | * @version $Id: core.php,v 1.5 2006/02/28 22:12:25 harryf Exp $ |
---|
5 | * @package utf8 |
---|
6 | * @subpackage strings |
---|
7 | */ |
---|
8 | |
---|
9 | // Define UTF8_CORE as required |
---|
10 | if (!defined('UTF8_CORE')) |
---|
11 | define('UTF8_CORE', true); |
---|
12 | |
---|
13 | /** |
---|
14 | * Wrapper round mb_strlen |
---|
15 | * Assumes you have mb_internal_encoding to UTF-8 already |
---|
16 | * Note: this function does not count bad bytes in the string - these |
---|
17 | * are simply ignored |
---|
18 | * @param string UTF-8 string |
---|
19 | * @return int number of UTF-8 characters in string |
---|
20 | * @package utf8 |
---|
21 | * @subpackage strings |
---|
22 | */ |
---|
23 | function utf8_strlen($str) |
---|
24 | { |
---|
25 | return mb_strlen($str); |
---|
26 | } |
---|
27 | |
---|
28 | /** |
---|
29 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
30 | * Wrapper around mb_strpos |
---|
31 | * Find position of first occurrence of a string |
---|
32 | * @param string haystack |
---|
33 | * @param string needle (you should validate this with utf8_is_valid) |
---|
34 | * @param integer offset in characters (from left) |
---|
35 | * @return mixed integer position or FALSE on failure |
---|
36 | * @package utf8 |
---|
37 | * @subpackage strings |
---|
38 | */ |
---|
39 | function utf8_strpos($str, $search, $offset = false) |
---|
40 | { |
---|
41 | // Strip unvalid characters |
---|
42 | $str = utf8_bad_strip($str); |
---|
43 | |
---|
44 | if ($offset === false) |
---|
45 | return mb_strpos($str, $search); |
---|
46 | else |
---|
47 | return mb_strpos($str, $search, $offset); |
---|
48 | } |
---|
49 | |
---|
50 | /** |
---|
51 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
52 | * Wrapper around mb_strrpos |
---|
53 | * Find position of last occurrence of a char in a string |
---|
54 | * @param string haystack |
---|
55 | * @param string needle (you should validate this with utf8_is_valid) |
---|
56 | * @param integer (optional) offset (from left) |
---|
57 | * @return mixed integer position or FALSE on failure |
---|
58 | * @package utf8 |
---|
59 | * @subpackage strings |
---|
60 | */ |
---|
61 | function utf8_strrpos($str, $search, $offset = false) |
---|
62 | { |
---|
63 | // Strip unvalid characters |
---|
64 | $str = utf8_bad_strip($str); |
---|
65 | |
---|
66 | if (!$offset) |
---|
67 | { |
---|
68 | // Emulate behaviour of strrpos rather than raising warning |
---|
69 | if (empty($str)) |
---|
70 | return false; |
---|
71 | |
---|
72 | return mb_strrpos($str, $search); |
---|
73 | } |
---|
74 | else |
---|
75 | { |
---|
76 | if (!is_int($offset)) |
---|
77 | { |
---|
78 | trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_WARNING); |
---|
79 | return false; |
---|
80 | } |
---|
81 | |
---|
82 | $str = mb_substr($str, $offset); |
---|
83 | |
---|
84 | if (($pos = mb_strrpos($str, $search)) !== false) |
---|
85 | return $pos + $offset; |
---|
86 | |
---|
87 | return false; |
---|
88 | } |
---|
89 | } |
---|
90 | |
---|
91 | /** |
---|
92 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
93 | * Wrapper around mb_substr |
---|
94 | * Return part of a string given character offset (and optionally length) |
---|
95 | * @param string |
---|
96 | * @param integer number of UTF-8 characters offset (from left) |
---|
97 | * @param integer (optional) length in UTF-8 characters from offset |
---|
98 | * @return mixed string or FALSE if failure |
---|
99 | * @package utf8 |
---|
100 | * @subpackage strings |
---|
101 | */ |
---|
102 | function utf8_substr($str, $offset, $length = false) |
---|
103 | { |
---|
104 | if ($length === false) |
---|
105 | return mb_substr($str, $offset); |
---|
106 | else |
---|
107 | return mb_substr($str, $offset, $length); |
---|
108 | } |
---|
109 | |
---|
110 | /** |
---|
111 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
112 | * Wrapper around mb_strtolower |
---|
113 | * Make a string lowercase |
---|
114 | * Note: The concept of a characters "case" only exists is some alphabets |
---|
115 | * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
---|
116 | * not exist in the Chinese alphabet, for example. See Unicode Standard |
---|
117 | * Annex #21: Case Mappings |
---|
118 | * @param string |
---|
119 | * @return mixed either string in lowercase or FALSE is UTF-8 invalid |
---|
120 | * @package utf8 |
---|
121 | * @subpackage strings |
---|
122 | */ |
---|
123 | function utf8_strtolower($str) |
---|
124 | { |
---|
125 | return mb_strtolower($str); |
---|
126 | } |
---|
127 | |
---|
128 | /** |
---|
129 | * Assumes mbstring internal encoding is set to UTF-8 |
---|
130 | * Wrapper around mb_strtoupper |
---|
131 | * Make a string uppercase |
---|
132 | * Note: The concept of a characters "case" only exists is some alphabets |
---|
133 | * such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does |
---|
134 | * not exist in the Chinese alphabet, for example. See Unicode Standard |
---|
135 | * Annex #21: Case Mappings |
---|
136 | * @param string |
---|
137 | * @return mixed either string in lowercase or FALSE is UTF-8 invalid |
---|
138 | * @package utf8 |
---|
139 | * @subpackage strings |
---|
140 | */ |
---|
141 | function utf8_strtoupper($str) |
---|
142 | { |
---|
143 | return mb_strtoupper($str); |
---|
144 | } |
---|