1 | <?php |
---|
2 | /*********************************************************************** |
---|
3 | |
---|
4 | Copyright (C) 2002-2005 Rickard Andersson (rickard@punbb.org) |
---|
5 | |
---|
6 | This file is part of PunBB. |
---|
7 | |
---|
8 | PunBB is free software; you can redistribute it and/or modify it |
---|
9 | under the terms of the GNU General Public License as published |
---|
10 | by the Free Software Foundation; either version 2 of the License, |
---|
11 | or (at your option) any later version. |
---|
12 | |
---|
13 | PunBB is distributed in the hope that it will be useful, but |
---|
14 | WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
16 | GNU General Public License for more details. |
---|
17 | |
---|
18 | You should have received a copy of the GNU General Public License |
---|
19 | along with this program; if not, write to the Free Software |
---|
20 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, |
---|
21 | MA 02111-1307 USA |
---|
22 | |
---|
23 | ************************************************************************/ |
---|
24 | |
---|
25 | |
---|
26 | // The contents of this file are very much inspired by the file functions_search.php |
---|
27 | // from the phpBB Group forum software phpBB2 (http://www.phpbb.com). |
---|
28 | |
---|
29 | |
---|
30 | // Make sure no one attempts to run this script "directly" |
---|
31 | if (!defined('PUN')) |
---|
32 | exit; |
---|
33 | |
---|
34 | |
---|
35 | // |
---|
36 | // "Cleans up" a text string and returns an array of unique words |
---|
37 | // This function depends on the current locale setting |
---|
38 | // |
---|
39 | function split_words($text) |
---|
40 | { |
---|
41 | global $pun_user; |
---|
42 | static $noise_match, $noise_replace, $stopwords; |
---|
43 | |
---|
44 | if (empty($noise_match)) |
---|
45 | { |
---|
46 | $noise_match = array('[quote', '[code', '[url', '[img', '[email', '[color', '[colour', 'quote]', 'code]', 'url]', 'img]', 'email]', 'color]', 'colour]', '^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '+', '[', ']', '{', '}', ':', '\\', '/', '=', '#', ';', '!', '*'); |
---|
47 | $noise_replace = array('', '', '', '', '', '', '', '', '', '', '', '', '', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' '); |
---|
48 | |
---|
49 | $stopwords = (array)@file(PUN_ROOT.'lang/'.$pun_user['language'].'/stopwords.txt'); |
---|
50 | $stopwords = array_map('trim', $stopwords); |
---|
51 | } |
---|
52 | |
---|
53 | // Clean up |
---|
54 | $patterns[] = '#&[\#a-z0-9]+?;#i'; |
---|
55 | $patterns[] = '#\b[\w]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/~]+)?#'; |
---|
56 | $patterns[] = '#\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]#'; |
---|
57 | $text = preg_replace($patterns, ' ', ' '.strtolower($text).' '); |
---|
58 | |
---|
59 | // Filter out junk |
---|
60 | $text = str_replace($noise_match, $noise_replace, $text); |
---|
61 | |
---|
62 | // Strip out extra whitespace between words |
---|
63 | $text = trim(preg_replace('#\s+#', ' ', $text)); |
---|
64 | |
---|
65 | // Fill an array with all the words |
---|
66 | $words = explode(' ', $text); |
---|
67 | |
---|
68 | if (!empty($words)) |
---|
69 | { |
---|
70 | while (list($i, $word) = @each($words)) |
---|
71 | { |
---|
72 | $words[$i] = trim($word, '.'); |
---|
73 | $num_chars = pun_strlen($word); |
---|
74 | |
---|
75 | if ($num_chars < 3 || $num_chars > 20 || in_array($word, $stopwords)) |
---|
76 | unset($words[$i]); |
---|
77 | } |
---|
78 | } |
---|
79 | |
---|
80 | return array_unique($words); |
---|
81 | } |
---|
82 | |
---|
83 | |
---|
84 | // |
---|
85 | // Updates the search index with the contents of $post_id (and $subject) |
---|
86 | // |
---|
87 | function update_search_index($mode, $post_id, $message, $subject = null) |
---|
88 | { |
---|
89 | global $db_type, $db; |
---|
90 | |
---|
91 | // Split old and new post/subject to obtain array of 'words' |
---|
92 | $words_message = split_words($message); |
---|
93 | $words_subject = ($subject) ? split_words($subject) : array(); |
---|
94 | |
---|
95 | if ($mode == 'edit') |
---|
96 | { |
---|
97 | $result = $db->query('SELECT w.id, w.word, m.subject_match FROM '.$db->prefix.'search_words AS w INNER JOIN '.$db->prefix.'search_matches AS m ON w.id=m.word_id WHERE m.post_id='.$post_id, true) or error('Impossible de retrouver les mots index de recherches', __FILE__, __LINE__, $db->error()); |
---|
98 | |
---|
99 | // Declare here to stop array_keys() and array_diff() from complaining if not set |
---|
100 | $cur_words['post'] = array(); |
---|
101 | $cur_words['subject'] = array(); |
---|
102 | |
---|
103 | while ($row = $db->fetch_row($result)) |
---|
104 | { |
---|
105 | $match_in = ($row[2]) ? 'subject' : 'post'; |
---|
106 | $cur_words[$match_in][$row[1]] = $row[0]; |
---|
107 | } |
---|
108 | |
---|
109 | $db->free_result($result); |
---|
110 | |
---|
111 | $words['add']['post'] = array_diff($words_message, array_keys($cur_words['post'])); |
---|
112 | $words['add']['subject'] = array_diff($words_subject, array_keys($cur_words['subject'])); |
---|
113 | $words['del']['post'] = array_diff(array_keys($cur_words['post']), $words_message); |
---|
114 | $words['del']['subject'] = array_diff(array_keys($cur_words['subject']), $words_subject); |
---|
115 | } |
---|
116 | else |
---|
117 | { |
---|
118 | $words['add']['post'] = $words_message; |
---|
119 | $words['add']['subject'] = $words_subject; |
---|
120 | $words['del']['post'] = array(); |
---|
121 | $words['del']['subject'] = array(); |
---|
122 | } |
---|
123 | |
---|
124 | unset($words_message); |
---|
125 | unset($words_subject); |
---|
126 | |
---|
127 | // Get unique words from the above arrays |
---|
128 | $unique_words = array_unique(array_merge($words['add']['post'], $words['add']['subject'])); |
---|
129 | |
---|
130 | if (!empty($unique_words)) |
---|
131 | { |
---|
132 | $result = $db->query('SELECT id, word FROM '.$db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $unique_words)).')', true) or error('Impossible de retrouver les mots index de recherches', __FILE__, __LINE__, $db->error()); |
---|
133 | |
---|
134 | $word_ids = array(); |
---|
135 | while ($row = $db->fetch_row($result)) |
---|
136 | $word_ids[$row[1]] = $row[0]; |
---|
137 | |
---|
138 | $db->free_result($result); |
---|
139 | |
---|
140 | $new_words = array_diff($unique_words, array_keys($word_ids)); |
---|
141 | unset($unique_words); |
---|
142 | |
---|
143 | if (!empty($new_words)) |
---|
144 | { |
---|
145 | switch ($db_type) |
---|
146 | { |
---|
147 | case 'mysql': |
---|
148 | case 'mysqli': |
---|
149 | $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES'.implode(',', preg_replace('#^(.*)$#', '(\'\1\')', $new_words))) or error('Impossible d\'ajouter les mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
150 | break; |
---|
151 | |
---|
152 | default: |
---|
153 | while (list(, $word) = @each($new_words)) |
---|
154 | $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES(\''.$word.'\')') or error('Impossible d\'ajouter les mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
155 | break; |
---|
156 | } |
---|
157 | } |
---|
158 | |
---|
159 | unset($new_words); |
---|
160 | } |
---|
161 | |
---|
162 | // Delete matches (only if editing a post) |
---|
163 | while (list($match_in, $wordlist) = @each($words['del'])) |
---|
164 | { |
---|
165 | $subject_match = ($match_in == 'subject') ? 1 : 0; |
---|
166 | |
---|
167 | if (!empty($wordlist)) |
---|
168 | { |
---|
169 | $sql = ''; |
---|
170 | while (list(, $word) = @each($wordlist)) |
---|
171 | $sql .= (($sql != '') ? ',' : '').$cur_words[$match_in][$word]; |
---|
172 | |
---|
173 | $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE word_id IN('.$sql.') AND post_id='.$post_id.' AND subject_match='.$subject_match) or error('Impossible de supprimer des mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
174 | } |
---|
175 | } |
---|
176 | |
---|
177 | // Add new matches |
---|
178 | while (list($match_in, $wordlist) = @each($words['add'])) |
---|
179 | { |
---|
180 | $subject_match = ($match_in == 'subject') ? 1 : 0; |
---|
181 | |
---|
182 | if (!empty($wordlist)) |
---|
183 | $db->query('INSERT INTO '.$db->prefix.'search_matches (post_id, word_id, subject_match) SELECT '.$post_id.', id, '.$subject_match.' FROM '.$db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $wordlist)).')') or error('Impossible d\'ajouter les correspondances index de recherche', __FILE__, __LINE__, $db->error()); |
---|
184 | } |
---|
185 | |
---|
186 | unset($words); |
---|
187 | } |
---|
188 | |
---|
189 | |
---|
190 | // |
---|
191 | // Strip search index of indexed words in $post_ids |
---|
192 | // |
---|
193 | function strip_search_index($post_ids) |
---|
194 | { |
---|
195 | global $db_type, $db; |
---|
196 | |
---|
197 | switch ($db_type) |
---|
198 | { |
---|
199 | case 'mysql': |
---|
200 | case 'mysqli': |
---|
201 | { |
---|
202 | $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id') or error('Impossible de retrouver les correspondances de mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
203 | |
---|
204 | if ($db->num_rows($result)) |
---|
205 | { |
---|
206 | $word_ids = ''; |
---|
207 | while ($row = $db->fetch_row($result)) |
---|
208 | $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; |
---|
209 | |
---|
210 | $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN('.$word_ids.') GROUP BY word_id HAVING COUNT(word_id)=1') or error('Impossible de retrouver les correspondances de mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
211 | |
---|
212 | if ($db->num_rows($result)) |
---|
213 | { |
---|
214 | $word_ids = ''; |
---|
215 | while ($row = $db->fetch_row($result)) |
---|
216 | $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; |
---|
217 | |
---|
218 | $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN('.$word_ids.')') or error('Impossible de supprimer des mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
219 | } |
---|
220 | } |
---|
221 | |
---|
222 | break; |
---|
223 | } |
---|
224 | |
---|
225 | default: |
---|
226 | $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id) GROUP BY word_id HAVING COUNT(word_id)=1)') or error('Impossible de supprimer depuis l\'index de recherche', __FILE__, __LINE__, $db->error()); |
---|
227 | break; |
---|
228 | } |
---|
229 | |
---|
230 | $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.')') or error('Impossible de supprimer des correspondances de mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
231 | } |
---|