[1] | 1 | <?php |
---|
| 2 | /*********************************************************************** |
---|
| 3 | |
---|
| 4 | Copyright (C) 2002-2005 Rickard Andersson (rickard@punbb.org) |
---|
| 5 | |
---|
| 6 | This file is part of PunBB. |
---|
| 7 | |
---|
| 8 | PunBB is free software; you can redistribute it and/or modify it |
---|
| 9 | under the terms of the GNU General Public License as published |
---|
| 10 | by the Free Software Foundation; either version 2 of the License, |
---|
| 11 | or (at your option) any later version. |
---|
| 12 | |
---|
| 13 | PunBB is distributed in the hope that it will be useful, but |
---|
| 14 | WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
| 15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
| 16 | GNU General Public License for more details. |
---|
| 17 | |
---|
| 18 | You should have received a copy of the GNU General Public License |
---|
| 19 | along with this program; if not, write to the Free Software |
---|
| 20 | Foundation, Inc., 59 Temple Place, Suite 330, Boston, |
---|
| 21 | MA 02111-1307 USA |
---|
| 22 | |
---|
| 23 | ************************************************************************/ |
---|
| 24 | |
---|
| 25 | |
---|
| 26 | // The contents of this file are very much inspired by the file functions_search.php |
---|
| 27 | // from the phpBB Group forum software phpBB2 (http://www.phpbb.com). |
---|
| 28 | |
---|
| 29 | |
---|
| 30 | // Make sure no one attempts to run this script "directly" |
---|
| 31 | if (!defined('PUN')) |
---|
| 32 | exit; |
---|
| 33 | |
---|
| 34 | |
---|
| 35 | // |
---|
| 36 | // "Cleans up" a text string and returns an array of unique words |
---|
| 37 | // This function depends on the current locale setting |
---|
| 38 | // |
---|
| 39 | function split_words($text) |
---|
| 40 | { |
---|
| 41 | global $pun_user; |
---|
| 42 | static $noise_match, $noise_replace, $stopwords; |
---|
| 43 | |
---|
| 44 | if (empty($noise_match)) |
---|
| 45 | { |
---|
| 46 | $noise_match = array('[quote', '[code', '[url', '[img', '[email', '[color', '[colour', 'quote]', 'code]', 'url]', 'img]', 'email]', 'color]', 'colour]', '^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '~', '+', '[', ']', '{', '}', ':', '\\', '/', '=', '#', ';', '!', '*'); |
---|
| 47 | $noise_replace = array('', '', '', '', '', '', '', '', '', '', '', '', '', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '', '', ' ', ' ', ' ', ' ', '', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '' , ' ', ' ', ' ', ' ', ' ', ' '); |
---|
| 48 | |
---|
| 49 | $stopwords = (array)@file(PUN_ROOT.'lang/'.$pun_user['language'].'/stopwords.txt'); |
---|
| 50 | $stopwords = array_map('trim', $stopwords); |
---|
| 51 | } |
---|
| 52 | |
---|
| 53 | // Clean up |
---|
| 54 | $patterns[] = '#&[\#a-z0-9]+?;#i'; |
---|
| 55 | $patterns[] = '#\b[\w]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/~]+)?#'; |
---|
| 56 | $patterns[] = '#\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]#'; |
---|
| 57 | $text = preg_replace($patterns, ' ', ' '.strtolower($text).' '); |
---|
| 58 | |
---|
| 59 | // Filter out junk |
---|
| 60 | $text = str_replace($noise_match, $noise_replace, $text); |
---|
| 61 | |
---|
| 62 | // Strip out extra whitespace between words |
---|
| 63 | $text = trim(preg_replace('#\s+#', ' ', $text)); |
---|
| 64 | |
---|
| 65 | // Fill an array with all the words |
---|
| 66 | $words = explode(' ', $text); |
---|
| 67 | |
---|
| 68 | if (!empty($words)) |
---|
| 69 | { |
---|
| 70 | while (list($i, $word) = @each($words)) |
---|
| 71 | { |
---|
| 72 | $words[$i] = trim($word, '.'); |
---|
| 73 | $num_chars = pun_strlen($word); |
---|
| 74 | |
---|
| 75 | if ($num_chars < 3 || $num_chars > 20 || in_array($word, $stopwords)) |
---|
| 76 | unset($words[$i]); |
---|
| 77 | } |
---|
| 78 | } |
---|
| 79 | |
---|
| 80 | return array_unique($words); |
---|
| 81 | } |
---|
| 82 | |
---|
| 83 | |
---|
| 84 | // |
---|
| 85 | // Updates the search index with the contents of $post_id (and $subject) |
---|
| 86 | // |
---|
| 87 | function update_search_index($mode, $post_id, $message, $subject = null) |
---|
| 88 | { |
---|
| 89 | global $db_type, $db; |
---|
| 90 | |
---|
| 91 | // Split old and new post/subject to obtain array of 'words' |
---|
| 92 | $words_message = split_words($message); |
---|
| 93 | $words_subject = ($subject) ? split_words($subject) : array(); |
---|
| 94 | |
---|
| 95 | if ($mode == 'edit') |
---|
| 96 | { |
---|
| 97 | $result = $db->query('SELECT w.id, w.word, m.subject_match FROM '.$db->prefix.'search_words AS w INNER JOIN '.$db->prefix.'search_matches AS m ON w.id=m.word_id WHERE m.post_id='.$post_id, true) or error('Impossible de retrouver les mots index de recherches', __FILE__, __LINE__, $db->error()); |
---|
| 98 | |
---|
| 99 | // Declare here to stop array_keys() and array_diff() from complaining if not set |
---|
| 100 | $cur_words['post'] = array(); |
---|
| 101 | $cur_words['subject'] = array(); |
---|
| 102 | |
---|
| 103 | while ($row = $db->fetch_row($result)) |
---|
| 104 | { |
---|
| 105 | $match_in = ($row[2]) ? 'subject' : 'post'; |
---|
| 106 | $cur_words[$match_in][$row[1]] = $row[0]; |
---|
| 107 | } |
---|
| 108 | |
---|
| 109 | $db->free_result($result); |
---|
| 110 | |
---|
| 111 | $words['add']['post'] = array_diff($words_message, array_keys($cur_words['post'])); |
---|
| 112 | $words['add']['subject'] = array_diff($words_subject, array_keys($cur_words['subject'])); |
---|
| 113 | $words['del']['post'] = array_diff(array_keys($cur_words['post']), $words_message); |
---|
| 114 | $words['del']['subject'] = array_diff(array_keys($cur_words['subject']), $words_subject); |
---|
| 115 | } |
---|
| 116 | else |
---|
| 117 | { |
---|
| 118 | $words['add']['post'] = $words_message; |
---|
| 119 | $words['add']['subject'] = $words_subject; |
---|
| 120 | $words['del']['post'] = array(); |
---|
| 121 | $words['del']['subject'] = array(); |
---|
| 122 | } |
---|
| 123 | |
---|
| 124 | unset($words_message); |
---|
| 125 | unset($words_subject); |
---|
| 126 | |
---|
| 127 | // Get unique words from the above arrays |
---|
| 128 | $unique_words = array_unique(array_merge($words['add']['post'], $words['add']['subject'])); |
---|
| 129 | |
---|
| 130 | if (!empty($unique_words)) |
---|
| 131 | { |
---|
| 132 | $result = $db->query('SELECT id, word FROM '.$db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $unique_words)).')', true) or error('Impossible de retrouver les mots index de recherches', __FILE__, __LINE__, $db->error()); |
---|
| 133 | |
---|
| 134 | $word_ids = array(); |
---|
| 135 | while ($row = $db->fetch_row($result)) |
---|
| 136 | $word_ids[$row[1]] = $row[0]; |
---|
| 137 | |
---|
| 138 | $db->free_result($result); |
---|
| 139 | |
---|
| 140 | $new_words = array_diff($unique_words, array_keys($word_ids)); |
---|
| 141 | unset($unique_words); |
---|
| 142 | |
---|
| 143 | if (!empty($new_words)) |
---|
| 144 | { |
---|
| 145 | switch ($db_type) |
---|
| 146 | { |
---|
| 147 | case 'mysql': |
---|
| 148 | case 'mysqli': |
---|
| 149 | $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES'.implode(',', preg_replace('#^(.*)$#', '(\'\1\')', $new_words))) or error('Impossible d\'ajouter les mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 150 | break; |
---|
| 151 | |
---|
| 152 | default: |
---|
| 153 | while (list(, $word) = @each($new_words)) |
---|
| 154 | $db->query('INSERT INTO '.$db->prefix.'search_words (word) VALUES(\''.$word.'\')') or error('Impossible d\'ajouter les mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 155 | break; |
---|
| 156 | } |
---|
| 157 | } |
---|
| 158 | |
---|
| 159 | unset($new_words); |
---|
| 160 | } |
---|
| 161 | |
---|
| 162 | // Delete matches (only if editing a post) |
---|
| 163 | while (list($match_in, $wordlist) = @each($words['del'])) |
---|
| 164 | { |
---|
| 165 | $subject_match = ($match_in == 'subject') ? 1 : 0; |
---|
| 166 | |
---|
| 167 | if (!empty($wordlist)) |
---|
| 168 | { |
---|
| 169 | $sql = ''; |
---|
| 170 | while (list(, $word) = @each($wordlist)) |
---|
| 171 | $sql .= (($sql != '') ? ',' : '').$cur_words[$match_in][$word]; |
---|
| 172 | |
---|
| 173 | $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE word_id IN('.$sql.') AND post_id='.$post_id.' AND subject_match='.$subject_match) or error('Impossible de supprimer des mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 174 | } |
---|
| 175 | } |
---|
| 176 | |
---|
| 177 | // Add new matches |
---|
| 178 | while (list($match_in, $wordlist) = @each($words['add'])) |
---|
| 179 | { |
---|
| 180 | $subject_match = ($match_in == 'subject') ? 1 : 0; |
---|
| 181 | |
---|
| 182 | if (!empty($wordlist)) |
---|
| 183 | $db->query('INSERT INTO '.$db->prefix.'search_matches (post_id, word_id, subject_match) SELECT '.$post_id.', id, '.$subject_match.' FROM '.$db->prefix.'search_words WHERE word IN('.implode(',', preg_replace('#^(.*)$#', '\'\1\'', $wordlist)).')') or error('Impossible d\'ajouter les correspondances index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 184 | } |
---|
| 185 | |
---|
| 186 | unset($words); |
---|
| 187 | } |
---|
| 188 | |
---|
| 189 | |
---|
| 190 | // |
---|
| 191 | // Strip search index of indexed words in $post_ids |
---|
| 192 | // |
---|
| 193 | function strip_search_index($post_ids) |
---|
| 194 | { |
---|
| 195 | global $db_type, $db; |
---|
| 196 | |
---|
| 197 | switch ($db_type) |
---|
| 198 | { |
---|
| 199 | case 'mysql': |
---|
| 200 | case 'mysqli': |
---|
| 201 | { |
---|
| 202 | $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id') or error('Impossible de retrouver les correspondances de mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 203 | |
---|
| 204 | if ($db->num_rows($result)) |
---|
| 205 | { |
---|
| 206 | $word_ids = ''; |
---|
| 207 | while ($row = $db->fetch_row($result)) |
---|
| 208 | $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; |
---|
| 209 | |
---|
| 210 | $result = $db->query('SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN('.$word_ids.') GROUP BY word_id HAVING COUNT(word_id)=1') or error('Impossible de retrouver les correspondances de mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 211 | |
---|
| 212 | if ($db->num_rows($result)) |
---|
| 213 | { |
---|
| 214 | $word_ids = ''; |
---|
| 215 | while ($row = $db->fetch_row($result)) |
---|
| 216 | $word_ids .= ($word_ids != '') ? ','.$row[0] : $row[0]; |
---|
| 217 | |
---|
| 218 | $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN('.$word_ids.')') or error('Impossible de supprimer des mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 219 | } |
---|
| 220 | } |
---|
| 221 | |
---|
| 222 | break; |
---|
| 223 | } |
---|
| 224 | |
---|
| 225 | default: |
---|
| 226 | $db->query('DELETE FROM '.$db->prefix.'search_words WHERE id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE word_id IN(SELECT word_id FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.') GROUP BY word_id) GROUP BY word_id HAVING COUNT(word_id)=1)') or error('Impossible de supprimer depuis l\'index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 227 | break; |
---|
| 228 | } |
---|
| 229 | |
---|
| 230 | $db->query('DELETE FROM '.$db->prefix.'search_matches WHERE post_id IN('.$post_ids.')') or error('Impossible de supprimer des correspondances de mots index de recherche', __FILE__, __LINE__, $db->error()); |
---|
| 231 | } |
---|