[ Index ] |
PHP Cross Reference of Unnamed Project |
[Summary view] [Print] [Text view]
1 <?php 2 /** 3 * 4 * @package search 5 * @version $Id$ 6 * @copyright (c) 2005 phpBB Group 7 * @license http://opensource.org/licenses/gpl-license.php GNU Public License 8 * 9 */ 10 11 /** 12 * @ignore 13 */ 14 if (!defined('IN_PHPBB')) 15 { 16 exit; 17 } 18 19 /** 20 * @ignore 21 */ 22 include_once($phpbb_root_path . 'includes/search/search.' . $phpEx); 23 24 /** 25 * fulltext_native 26 * phpBB's own db driven fulltext search, version 2 27 * @package search 28 */ 29 class fulltext_native extends search_backend 30 { 31 var $stats = array(); 32 var $word_length = array(); 33 var $search_query; 34 var $common_words = array(); 35 36 var $must_contain_ids = array(); 37 var $must_not_contain_ids = array(); 38 var $must_exclude_one_ids = array(); 39 40 /** 41 * Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded. 42 * 43 * @param boolean|string &$error is passed by reference and should either be set to false on success or an error message on failure. 44 * 45 * @access public 46 */ 47 function fulltext_native(&$error) 48 { 49 global $phpbb_root_path, $phpEx, $config; 50 51 $this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']); 52 53 /** 54 * Load the UTF tools 55 */ 56 if (!class_exists('utf_normalizer')) 57 { 58 include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx); 59 } 60 61 62 $error = false; 63 } 64 65 /** 66 * This function fills $this->search_query with the cleaned user search query. 67 * 68 * If $terms is 'any' then the words will be extracted from the search query 69 * and combined with | inside brackets. They will afterwards be treated like 70 * an standard search query. 71 * 72 * Then it analyses the query and fills the internal arrays $must_not_contain_ids, 73 * $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search(). 74 * 75 * @param string $keywords contains the search query string as entered by the user 76 * @param string $terms is either 'all' (use search query as entered, default words to 'must be contained in post') 77 * or 'any' (find all posts containing at least one of the given words) 78 * @return boolean false if no valid keywords were found and otherwise true 79 * 80 * @access public 81 */ 82 function split_keywords($keywords, $terms) 83 { 84 global $db, $user, $config; 85 86 $tokens = '+-|()*'; 87 88 $keywords = trim($this->cleanup($keywords, $tokens)); 89 90 // allow word|word|word without brackets 91 if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false)) 92 { 93 $keywords = '(' . $keywords . ')'; 94 } 95 96 $open_bracket = $space = false; 97 for ($i = 0, $n = strlen($keywords); $i < $n; $i++) 98 { 99 if ($open_bracket !== false) 100 { 101 switch ($keywords[$i]) 102 { 103 case ')': 104 if ($open_bracket + 1 == $i) 105 { 106 $keywords[$i - 1] = '|'; 107 $keywords[$i] = '|'; 108 } 109 $open_bracket = false; 110 break; 111 case '(': 112 $keywords[$i] = '|'; 113 break; 114 case '+': 115 case '-': 116 case ' ': 117 $keywords[$i] = '|'; 118 break; 119 case '*': 120 if ($i === 0 || ($keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0)) 121 { 122 if ($i === $n - 1 || ($keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0)) 123 { 124 $keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1); 125 } 126 } 127 break; 128 } 129 } 130 else 131 { 132 switch ($keywords[$i]) 133 { 134 case ')': 135 $keywords[$i] = ' '; 136 break; 137 case '(': 138 $open_bracket = $i; 139 $space = false; 140 break; 141 case '|': 142 $keywords[$i] = ' '; 143 break; 144 case '-': 145 case '+': 146 $space = $keywords[$i]; 147 break; 148 case ' ': 149 if ($space !== false) 150 { 151 $keywords[$i] = $space; 152 } 153 break; 154 default: 155 $space = false; 156 } 157 } 158 } 159 160 if ($open_bracket) 161 { 162 $keywords .= ')'; 163 } 164 165 $match = array( 166 '# +#', 167 '#\|\|+#', 168 '#(\+|\-)(?:\+|\-)+#', 169 '#\(\|#', 170 '#\|\)#', 171 ); 172 $replace = array( 173 ' ', 174 '|', 175 '$1', 176 '(', 177 ')', 178 ); 179 180 $keywords = preg_replace($match, $replace, $keywords); 181 $num_keywords = sizeof(explode(' ', $keywords)); 182 183 // We limit the number of allowed keywords to minimize load on the database 184 if ($config['max_num_search_keywords'] && $num_keywords > $config['max_num_search_keywords']) 185 { 186 trigger_error($user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', $config['max_num_search_keywords'], $num_keywords)); 187 } 188 189 // $keywords input format: each word separated by a space, words in a bracket are not separated 190 191 // the user wants to search for any word, convert the search query 192 if ($terms == 'any') 193 { 194 $words = array(); 195 196 preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words); 197 if (sizeof($words[1])) 198 { 199 $keywords = '(' . implode('|', $words[1]) . ')'; 200 } 201 } 202 203 // set the search_query which is shown to the user 204 $this->search_query = $keywords; 205 206 $exact_words = array(); 207 preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words); 208 $exact_words = $exact_words[1]; 209 210 $common_ids = $words = array(); 211 212 if (sizeof($exact_words)) 213 { 214 $sql = 'SELECT word_id, word_text, word_common 215 FROM ' . SEARCH_WORDLIST_TABLE . ' 216 WHERE ' . $db->sql_in_set('word_text', $exact_words) . ' 217 ORDER BY word_count ASC'; 218 $result = $db->sql_query($sql); 219 220 // store an array of words and ids, remove common words 221 while ($row = $db->sql_fetchrow($result)) 222 { 223 if ($row['word_common']) 224 { 225 $this->common_words[] = $row['word_text']; 226 $common_ids[$row['word_text']] = (int) $row['word_id']; 227 continue; 228 } 229 230 $words[$row['word_text']] = (int) $row['word_id']; 231 } 232 $db->sql_freeresult($result); 233 } 234 unset($exact_words); 235 236 // now analyse the search query, first split it using the spaces 237 $query = explode(' ', $keywords); 238 239 $this->must_contain_ids = array(); 240 $this->must_not_contain_ids = array(); 241 $this->must_exclude_one_ids = array(); 242 243 $mode = ''; 244 $ignore_no_id = true; 245 246 foreach ($query as $word) 247 { 248 if (empty($word)) 249 { 250 continue; 251 } 252 253 // words which should not be included 254 if ($word[0] == '-') 255 { 256 $word = substr($word, 1); 257 258 // a group of which at least one may not be in the resulting posts 259 if ($word[0] == '(') 260 { 261 $word = array_unique(explode('|', substr($word, 1, -1))); 262 $mode = 'must_exclude_one'; 263 } 264 // one word which should not be in the resulting posts 265 else 266 { 267 $mode = 'must_not_contain'; 268 } 269 $ignore_no_id = true; 270 } 271 // words which have to be included 272 else 273 { 274 // no prefix is the same as a +prefix 275 if ($word[0] == '+') 276 { 277 $word = substr($word, 1); 278 } 279 280 // a group of words of which at least one word should be in every resulting post 281 if ($word[0] == '(') 282 { 283 $word = array_unique(explode('|', substr($word, 1, -1))); 284 } 285 $ignore_no_id = false; 286 $mode = 'must_contain'; 287 } 288 289 if (empty($word)) 290 { 291 continue; 292 } 293 294 // if this is an array of words then retrieve an id for each 295 if (is_array($word)) 296 { 297 $non_common_words = array(); 298 $id_words = array(); 299 foreach ($word as $i => $word_part) 300 { 301 if (strpos($word_part, '*') !== false) 302 { 303 $id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\''; 304 $non_common_words[] = $word_part; 305 } 306 else if (isset($words[$word_part])) 307 { 308 $id_words[] = $words[$word_part]; 309 $non_common_words[] = $word_part; 310 } 311 else 312 { 313 $len = utf8_strlen($word_part); 314 if ($len < $this->word_length['min'] || $len > $this->word_length['max']) 315 { 316 $this->common_words[] = $word_part; 317 } 318 } 319 } 320 if (sizeof($id_words)) 321 { 322 sort($id_words); 323 if (sizeof($id_words) > 1) 324 { 325 $this->{$mode . '_ids'}[] = $id_words; 326 } 327 else 328 { 329 $mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode; 330 $this->{$mode . '_ids'}[] = $id_words[0]; 331 } 332 } 333 // throw an error if we shall not ignore unexistant words 334 else if (!$ignore_no_id && sizeof($non_common_words)) 335 { 336 trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $non_common_words))); 337 } 338 unset($non_common_words); 339 } 340 // else we only need one id 341 else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word])) 342 { 343 if ($wildcard) 344 { 345 $len = utf8_strlen(str_replace('*', '', $word)); 346 if ($len >= $this->word_length['min'] && $len <= $this->word_length['max']) 347 { 348 $this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\''; 349 } 350 else 351 { 352 $this->common_words[] = $word; 353 } 354 } 355 else 356 { 357 $this->{$mode . '_ids'}[] = $words[$word]; 358 } 359 } 360 // throw an error if we shall not ignore unexistant words 361 else if (!$ignore_no_id) 362 { 363 if (!isset($common_ids[$word])) 364 { 365 $len = utf8_strlen($word); 366 if ($len >= $this->word_length['min'] && $len <= $this->word_length['max']) 367 { 368 trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word)); 369 } 370 else 371 { 372 $this->common_words[] = $word; 373 } 374 } 375 } 376 else 377 { 378 $len = utf8_strlen($word); 379 if ($len < $this->word_length['min'] || $len > $this->word_length['max']) 380 { 381 $this->common_words[] = $word; 382 } 383 } 384 } 385 386 // we can't search for negatives only 387 if (!sizeof($this->must_contain_ids)) 388 { 389 return false; 390 } 391 392 if (!empty($this->search_query)) 393 { 394 return true; 395 } 396 return false; 397 } 398 399 /** 400 * Performs a search on keywords depending on display specific params. You have to run split_keywords() first. 401 * 402 * @param string $type contains either posts or topics depending on what should be searched for 403 * @param string $fields contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched) 404 * @param string $terms is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words) 405 * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query 406 * @param string $sort_key is the key of $sort_by_sql for the selected sorting 407 * @param string $sort_dir is either a or d representing ASC and DESC 408 * @param string $sort_days specifies the maximum amount of days a post may be old 409 * @param array $ex_fid_ary specifies an array of forum ids which should not be searched 410 * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts 411 * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched 412 * @param array $author_ary an array of author ids if the author should be ignored during the search the array is empty 413 * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match 414 * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered 415 * @param int $start indicates the first index of the page 416 * @param int $per_page number of ids each page is supposed to contain 417 * @return boolean|int total number of results 418 * 419 * @access public 420 */ 421 function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page) 422 { 423 global $config, $db; 424 425 // No keywords? No posts. 426 if (empty($this->search_query)) 427 { 428 return false; 429 } 430 431 $must_contain_ids = $this->must_contain_ids; 432 $must_not_contain_ids = $this->must_not_contain_ids; 433 $must_exclude_one_ids = $this->must_exclude_one_ids; 434 435 sort($must_contain_ids); 436 sort($must_not_contain_ids); 437 sort($must_exclude_one_ids); 438 439 // generate a search_key from all the options to identify the results 440 $search_key = md5(implode('#', array( 441 serialize($must_contain_ids), 442 serialize($must_not_contain_ids), 443 serialize($must_exclude_one_ids), 444 $type, 445 $fields, 446 $terms, 447 $sort_days, 448 $sort_key, 449 $topic_id, 450 implode(',', $ex_fid_ary), 451 implode(',', $m_approve_fid_ary), 452 implode(',', $author_ary), 453 $author_name, 454 ))); 455 456 // try reading the results from cache 457 $total_results = 0; 458 if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) 459 { 460 return $total_results; 461 } 462 463 $id_ary = array(); 464 465 $sql_where = array(); 466 $group_by = false; 467 $m_num = 0; 468 $w_num = 0; 469 470 $sql_array = array( 471 'SELECT' => ($type == 'posts') ? 'p.post_id' : 'p.topic_id', 472 'FROM' => array( 473 SEARCH_WORDMATCH_TABLE => array(), 474 SEARCH_WORDLIST_TABLE => array(), 475 ), 476 'LEFT_JOIN' => array(array( 477 'FROM' => array(POSTS_TABLE => 'p'), 478 'ON' => 'm0.post_id = p.post_id', 479 )), 480 ); 481 482 $title_match = ''; 483 $left_join_topics = false; 484 $group_by = true; 485 // Build some display specific sql strings 486 switch ($fields) 487 { 488 case 'titleonly': 489 $title_match = 'title_match = 1'; 490 $group_by = false; 491 // no break 492 case 'firstpost': 493 $left_join_topics = true; 494 $sql_where[] = 'p.post_id = t.topic_first_post_id'; 495 break; 496 497 case 'msgonly': 498 $title_match = 'title_match = 0'; 499 $group_by = false; 500 break; 501 } 502 503 if ($type == 'topics') 504 { 505 $left_join_topics = true; 506 $group_by = true; 507 } 508 509 /** 510 * @todo Add a query optimizer (handle stuff like "+(4|3) +4") 511 */ 512 513 foreach ($this->must_contain_ids as $subquery) 514 { 515 if (is_array($subquery)) 516 { 517 $group_by = true; 518 519 $word_id_sql = array(); 520 $word_ids = array(); 521 foreach ($subquery as $id) 522 { 523 if (is_string($id)) 524 { 525 $sql_array['LEFT_JOIN'][] = array( 526 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 527 'ON' => "w$w_num.word_text LIKE $id" 528 ); 529 $word_ids[] = "w$w_num.word_id"; 530 531 $w_num++; 532 } 533 else 534 { 535 $word_ids[] = $id; 536 } 537 } 538 539 $sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids); 540 541 unset($word_id_sql); 542 unset($word_ids); 543 } 544 else if (is_string($subquery)) 545 { 546 $sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num; 547 548 $sql_where[] = "w$w_num.word_text LIKE $subquery"; 549 $sql_where[] = "m$m_num.word_id = w$w_num.word_id"; 550 551 $group_by = true; 552 $w_num++; 553 } 554 else 555 { 556 $sql_where[] = "m$m_num.word_id = $subquery"; 557 } 558 559 $sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num; 560 561 if ($title_match) 562 { 563 $sql_where[] = "m$m_num.$title_match"; 564 } 565 566 if ($m_num != 0) 567 { 568 $sql_where[] = "m$m_num.post_id = m0.post_id"; 569 } 570 $m_num++; 571 } 572 573 foreach ($this->must_not_contain_ids as $key => $subquery) 574 { 575 if (is_string($subquery)) 576 { 577 $sql_array['LEFT_JOIN'][] = array( 578 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 579 'ON' => "w$w_num.word_text LIKE $subquery" 580 ); 581 582 $this->must_not_contain_ids[$key] = "w$w_num.word_id"; 583 584 $group_by = true; 585 $w_num++; 586 } 587 } 588 589 if (sizeof($this->must_not_contain_ids)) 590 { 591 $sql_array['LEFT_JOIN'][] = array( 592 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), 593 'ON' => $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id" 594 ); 595 596 $sql_where[] = "m$m_num.word_id IS NULL"; 597 $m_num++; 598 } 599 600 foreach ($this->must_exclude_one_ids as $ids) 601 { 602 $is_null_joins = array(); 603 foreach ($ids as $id) 604 { 605 if (is_string($id)) 606 { 607 $sql_array['LEFT_JOIN'][] = array( 608 'FROM' => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num), 609 'ON' => "w$w_num.word_text LIKE $id" 610 ); 611 $id = "w$w_num.word_id"; 612 613 $group_by = true; 614 $w_num++; 615 } 616 617 $sql_array['LEFT_JOIN'][] = array( 618 'FROM' => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num), 619 'ON' => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '') 620 ); 621 $is_null_joins[] = "m$m_num.word_id IS NULL"; 622 623 $m_num++; 624 } 625 $sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')'; 626 } 627 628 if (!sizeof($m_approve_fid_ary)) 629 { 630 $sql_where[] = 'p.post_approved = 1'; 631 } 632 else if ($m_approve_fid_ary !== array(-1)) 633 { 634 $sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')'; 635 } 636 637 if ($topic_id) 638 { 639 $sql_where[] = 'p.topic_id = ' . $topic_id; 640 } 641 642 if (sizeof($author_ary)) 643 { 644 if ($author_name) 645 { 646 // first one matches post of registered users, second one guests and deleted users 647 $sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; 648 } 649 else 650 { 651 $sql_author = $db->sql_in_set('p.poster_id', $author_ary); 652 } 653 $sql_where[] = $sql_author; 654 } 655 656 if (sizeof($ex_fid_ary)) 657 { 658 $sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true); 659 } 660 661 if ($sort_days) 662 { 663 $sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400)); 664 } 665 666 $sql_array['WHERE'] = implode(' AND ', $sql_where); 667 668 $is_mysql = false; 669 // if the total result count is not cached yet, retrieve it from the db 670 if (!$total_results) 671 { 672 $sql = ''; 673 $sql_array_count = $sql_array; 674 675 if ($left_join_topics) 676 { 677 $sql_array_count['LEFT_JOIN'][] = array( 678 'FROM' => array(TOPICS_TABLE => 't'), 679 'ON' => 'p.topic_id = t.topic_id' 680 ); 681 } 682 683 switch ($db->sql_layer) 684 { 685 case 'mysql4': 686 case 'mysqli': 687 688 // 3.x does not support SQL_CALC_FOUND_ROWS 689 // $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT']; 690 $is_mysql = true; 691 692 break; 693 694 case 'sqlite': 695 $sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id'; 696 $sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results 697 FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')'; 698 699 // no break 700 701 default: 702 $sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results'; 703 $sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql; 704 705 $result = $db->sql_query($sql); 706 $total_results = (int) $db->sql_fetchfield('total_results'); 707 $db->sql_freeresult($result); 708 709 if (!$total_results) 710 { 711 return false; 712 } 713 break; 714 } 715 716 unset($sql_array_count, $sql); 717 } 718 719 // Build sql strings for sorting 720 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); 721 722 switch ($sql_sort[0]) 723 { 724 case 'u': 725 $sql_array['FROM'][USERS_TABLE] = 'u'; 726 $sql_where[] = 'u.user_id = p.poster_id '; 727 break; 728 729 case 't': 730 $left_join_topics = true; 731 break; 732 733 case 'f': 734 $sql_array['FROM'][FORUMS_TABLE] = 'f'; 735 $sql_where[] = 'f.forum_id = p.forum_id'; 736 break; 737 } 738 739 if ($left_join_topics) 740 { 741 $sql_array['LEFT_JOIN'][] = array( 742 'FROM' => array(TOPICS_TABLE => 't'), 743 'ON' => 'p.topic_id = t.topic_id' 744 ); 745 } 746 747 $sql_array['WHERE'] = implode(' AND ', $sql_where); 748 $sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : ''; 749 $sql_array['ORDER_BY'] = $sql_sort; 750 751 unset($sql_where, $sql_sort, $group_by); 752 753 $sql = $db->sql_build_query('SELECT', $sql_array); 754 $result = $db->sql_query_limit($sql, $config['search_block_size'], $start); 755 756 while ($row = $db->sql_fetchrow($result)) 757 { 758 $id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')]; 759 } 760 $db->sql_freeresult($result); 761 762 if (!sizeof($id_ary)) 763 { 764 return false; 765 } 766 767 // if we use mysql and the total result count is not cached yet, retrieve it from the db 768 if (!$total_results && $is_mysql) 769 { 770 // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it. 771 $sql_array_copy = $sql_array; 772 $sql_array_copy['SELECT'] = 'SQL_CALC_FOUND_ROWS p.post_id '; 773 774 $sql = $db->sql_build_query('SELECT', $sql_array_copy); 775 unset($sql_array_copy); 776 777 $db->sql_query($sql); 778 $db->sql_freeresult($result); 779 780 $sql = 'SELECT FOUND_ROWS() as total_results'; 781 $result = $db->sql_query($sql); 782 $total_results = (int) $db->sql_fetchfield('total_results'); 783 $db->sql_freeresult($result); 784 785 if (!$total_results) 786 { 787 return false; 788 } 789 } 790 791 // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page 792 $this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir); 793 $id_ary = array_slice($id_ary, 0, (int) $per_page); 794 795 return $total_results; 796 } 797 798 /** 799 * Performs a search on an author's posts without caring about message contents. Depends on display specific params 800 * 801 * @param string $type contains either posts or topics depending on what should be searched for 802 * @param boolean $firstpost_only if true, only topic starting posts will be considered 803 * @param array $sort_by_sql contains SQL code for the ORDER BY part of a query 804 * @param string $sort_key is the key of $sort_by_sql for the selected sorting 805 * @param string $sort_dir is either a or d representing ASC and DESC 806 * @param string $sort_days specifies the maximum amount of days a post may be old 807 * @param array $ex_fid_ary specifies an array of forum ids which should not be searched 808 * @param array $m_approve_fid_ary specifies an array of forum ids in which the searcher is allowed to view unapproved posts 809 * @param int $topic_id is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched 810 * @param array $author_ary an array of author ids 811 * @param string $author_name specifies the author match, when ANONYMOUS is also a search-match 812 * @param array &$id_ary passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered 813 * @param int $start indicates the first index of the page 814 * @param int $per_page number of ids each page is supposed to contain 815 * @return boolean|int total number of results 816 * 817 * @access public 818 */ 819 function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page) 820 { 821 global $config, $db; 822 823 // No author? No posts. 824 if (!sizeof($author_ary)) 825 { 826 return 0; 827 } 828 829 // generate a search_key from all the options to identify the results 830 $search_key = md5(implode('#', array( 831 '', 832 $type, 833 ($firstpost_only) ? 'firstpost' : '', 834 '', 835 '', 836 $sort_days, 837 $sort_key, 838 $topic_id, 839 implode(',', $ex_fid_ary), 840 implode(',', $m_approve_fid_ary), 841 implode(',', $author_ary), 842 $author_name, 843 ))); 844 845 // try reading the results from cache 846 $total_results = 0; 847 if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE) 848 { 849 return $total_results; 850 } 851 852 $id_ary = array(); 853 854 // Create some display specific sql strings 855 if ($author_name) 856 { 857 // first one matches post of registered users, second one guests and deleted users 858 $sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')'; 859 } 860 else 861 { 862 $sql_author = $db->sql_in_set('p.poster_id', $author_ary); 863 } 864 $sql_fora = (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : ''; 865 $sql_time = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : ''; 866 $sql_topic_id = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : ''; 867 $sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : ''; 868 869 // Build sql strings for sorting 870 $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC'); 871 $sql_sort_table = $sql_sort_join = ''; 872 switch ($sql_sort[0]) 873 { 874 case 'u': 875 $sql_sort_table = USERS_TABLE . ' u, '; 876 $sql_sort_join = ' AND u.user_id = p.poster_id '; 877 break; 878 879 case 't': 880 $sql_sort_table = ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : ''; 881 $sql_sort_join = ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : ''; 882 break; 883 884 case 'f': 885 $sql_sort_table = FORUMS_TABLE . ' f, '; 886 $sql_sort_join = ' AND f.forum_id = p.forum_id '; 887 break; 888 } 889 890 if (!sizeof($m_approve_fid_ary)) 891 { 892 $m_approve_fid_sql = ' AND p.post_approved = 1'; 893 } 894 else if ($m_approve_fid_ary == array(-1)) 895 { 896 $m_approve_fid_sql = ''; 897 } 898 else 899 { 900 $m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')'; 901 } 902 903 $select = ($type == 'posts') ? 'p.post_id' : 't.topic_id'; 904 $is_mysql = false; 905 906 // If the cache was completely empty count the results 907 if (!$total_results) 908 { 909 switch ($db->sql_layer) 910 { 911 case 'mysql4': 912 case 'mysqli': 913 // $select = 'SQL_CALC_FOUND_ROWS ' . $select; 914 $is_mysql = true; 915 break; 916 917 default: 918 if ($type == 'posts') 919 { 920 $sql = 'SELECT COUNT(p.post_id) as total_results 921 FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . " 922 WHERE $sql_author 923 $sql_topic_id 924 $sql_firstpost 925 $m_approve_fid_sql 926 $sql_fora 927 $sql_time"; 928 } 929 else 930 { 931 if ($db->sql_layer == 'sqlite') 932 { 933 $sql = 'SELECT COUNT(topic_id) as total_results 934 FROM (SELECT DISTINCT t.topic_id'; 935 } 936 else 937 { 938 $sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results'; 939 } 940 941 $sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p 942 WHERE $sql_author 943 $sql_topic_id 944 $sql_firstpost 945 $m_approve_fid_sql 946 $sql_fora 947 AND t.topic_id = p.topic_id 948 $sql_time" . (($db->sql_layer == 'sqlite') ? ')' : ''); 949 } 950 $result = $db->sql_query($sql); 951 952 $total_results = (int) $db->sql_fetchfield('total_results'); 953 $db->sql_freeresult($result); 954 955 if (!$total_results) 956 { 957 return false; 958 } 959 break; 960 } 961 } 962 963 // Build the query for really selecting the post_ids 964 if ($type == 'posts') 965 { 966 $sql = "SELECT $select 967 FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . " 968 WHERE $sql_author 969 $sql_topic_id 970 $sql_firstpost 971 $m_approve_fid_sql 972 $sql_fora 973 $sql_sort_join 974 $sql_time 975 ORDER BY $sql_sort"; 976 $field = 'post_id'; 977 } 978 else 979 { 980 $sql = "SELECT $select 981 FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p 982 WHERE $sql_author 983 $sql_topic_id 984 $sql_firstpost 985 $m_approve_fid_sql 986 $sql_fora 987 AND t.topic_id = p.topic_id 988 $sql_sort_join 989 $sql_time 990 GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . ' 991 ORDER BY ' . $sql_sort; 992 $field = 'topic_id'; 993 } 994 995 // Only read one block of posts from the db and then cache it 996 $result = $db->sql_query_limit($sql, $config['search_block_size'], $start); 997 998 while ($row = $db->sql_fetchrow($result)) 999 { 1000 $id_ary[] = (int) $row[$field]; 1001 } 1002 $db->sql_freeresult($result); 1003 1004 if (!$total_results && $is_mysql) 1005 { 1006 // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it. 1007 $sql = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql); 1008 1009 $db->sql_query($sql); 1010 $db->sql_freeresult($result); 1011 1012 $sql = 'SELECT FOUND_ROWS() as total_results'; 1013 $result = $db->sql_query($sql); 1014 $total_results = (int) $db->sql_fetchfield('total_results'); 1015 $db->sql_freeresult($result); 1016 1017 if (!$total_results) 1018 { 1019 return false; 1020 } 1021 } 1022 1023 if (sizeof($id_ary)) 1024 { 1025 $this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir); 1026 $id_ary = array_slice($id_ary, 0, $per_page); 1027 1028 return $total_results; 1029 } 1030 return false; 1031 } 1032 1033 /** 1034 * Split a text into words of a given length 1035 * 1036 * The text is converted to UTF-8, cleaned up, and split. Then, words that 1037 * conform to the defined length range are returned in an array. 1038 * 1039 * NOTE: duplicates are NOT removed from the return array 1040 * 1041 * @param string $text Text to split, encoded in UTF-8 1042 * @return array Array of UTF-8 words 1043 * 1044 * @access private 1045 */ 1046 function split_message($text) 1047 { 1048 global $phpbb_root_path, $phpEx, $user; 1049 1050 $match = $words = array(); 1051 1052 /** 1053 * Taken from the original code 1054 */ 1055 // Do not index code 1056 $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is'; 1057 // BBcode 1058 $match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#'; 1059 1060 $min = $this->word_length['min']; 1061 $max = $this->word_length['max']; 1062 1063 $isset_min = $min - 1; 1064 1065 /** 1066 * Clean up the string, remove HTML tags, remove BBCodes 1067 */ 1068 $word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' '); 1069 1070 while (strlen($word)) 1071 { 1072 if (strlen($word) > 255 || strlen($word) <= $isset_min) 1073 { 1074 /** 1075 * Words longer than 255 bytes are ignored. This will have to be 1076 * changed whenever we change the length of search_wordlist.word_text 1077 * 1078 * Words shorter than $isset_min bytes are ignored, too 1079 */ 1080 $word = strtok(' '); 1081 continue; 1082 } 1083 1084 $len = utf8_strlen($word); 1085 1086 /** 1087 * Test whether the word is too short to be indexed. 1088 * 1089 * Note that this limit does NOT apply to CJK and Hangul 1090 */ 1091 if ($len < $min) 1092 { 1093 /** 1094 * Note: this could be optimized. If the codepoint is lower than Hangul's range 1095 * we know that it will also be lower than CJK ranges 1096 */ 1097 if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0) 1098 && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0) 1099 && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0)) 1100 { 1101 $word = strtok(' '); 1102 continue; 1103 } 1104 } 1105 1106 $words[] = $word; 1107 $word = strtok(' '); 1108 } 1109 1110 return $words; 1111 } 1112 1113 /** 1114 * Updates wordlist and wordmatch tables when a message is posted or changed 1115 * 1116 * @param string $mode Contains the post mode: edit, post, reply, quote 1117 * @param int $post_id The id of the post which is modified/created 1118 * @param string &$message New or updated post content 1119 * @param string &$subject New or updated post subject 1120 * @param int $poster_id Post author's user id 1121 * @param int $forum_id The id of the forum in which the post is located 1122 * 1123 * @access public 1124 */ 1125 function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id) 1126 { 1127 global $config, $db, $user; 1128 1129 if (!$config['fulltext_native_load_upd']) 1130 { 1131 /** 1132 * The search indexer is disabled, return 1133 */ 1134 return; 1135 } 1136 1137 // Split old and new post/subject to obtain array of 'words' 1138 $split_text = $this->split_message($message); 1139 $split_title = $this->split_message($subject); 1140 1141 $cur_words = array('post' => array(), 'title' => array()); 1142 1143 $words = array(); 1144 if ($mode == 'edit') 1145 { 1146 $words['add']['post'] = array(); 1147 $words['add']['title'] = array(); 1148 $words['del']['post'] = array(); 1149 $words['del']['title'] = array(); 1150 1151 $sql = 'SELECT w.word_id, w.word_text, m.title_match 1152 FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m 1153 WHERE m.post_id = $post_id 1154 AND w.word_id = m.word_id"; 1155 $result = $db->sql_query($sql); 1156 1157 while ($row = $db->sql_fetchrow($result)) 1158 { 1159 $which = ($row['title_match']) ? 'title' : 'post'; 1160 $cur_words[$which][$row['word_text']] = $row['word_id']; 1161 } 1162 $db->sql_freeresult($result); 1163 1164 $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post'])); 1165 $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title'])); 1166 $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text); 1167 $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title); 1168 } 1169 else 1170 { 1171 $words['add']['post'] = $split_text; 1172 $words['add']['title'] = $split_title; 1173 $words['del']['post'] = array(); 1174 $words['del']['title'] = array(); 1175 } 1176 unset($split_text); 1177 unset($split_title); 1178 1179 // Get unique words from the above arrays 1180 $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title'])); 1181 1182 // We now have unique arrays of all words to be added and removed and 1183 // individual arrays of added and removed words for text and title. What 1184 // we need to do now is add the new words (if they don't already exist) 1185 // and then add (or remove) matches between the words and this post 1186 if (sizeof($unique_add_words)) 1187 { 1188 $sql = 'SELECT word_id, word_text 1189 FROM ' . SEARCH_WORDLIST_TABLE . ' 1190 WHERE ' . $db->sql_in_set('word_text', $unique_add_words); 1191 $result = $db->sql_query($sql); 1192 1193 $word_ids = array(); 1194 while ($row = $db->sql_fetchrow($result)) 1195 { 1196 $word_ids[$row['word_text']] = $row['word_id']; 1197 } 1198 $db->sql_freeresult($result); 1199 $new_words = array_diff($unique_add_words, array_keys($word_ids)); 1200 1201 $db->sql_transaction('begin'); 1202 if (sizeof($new_words)) 1203 { 1204 $sql_ary = array(); 1205 1206 foreach ($new_words as $word) 1207 { 1208 $sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0); 1209 } 1210 $db->sql_return_on_error(true); 1211 $db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary); 1212 $db->sql_return_on_error(false); 1213 } 1214 unset($new_words, $sql_ary); 1215 } 1216 else 1217 { 1218 $db->sql_transaction('begin'); 1219 } 1220 1221 // now update the search match table, remove links to removed words and add links to new words 1222 foreach ($words['del'] as $word_in => $word_ary) 1223 { 1224 $title_match = ($word_in == 'title') ? 1 : 0; 1225 1226 if (sizeof($word_ary)) 1227 { 1228 $sql_in = array(); 1229 foreach ($word_ary as $word) 1230 { 1231 $sql_in[] = $cur_words[$word_in][$word]; 1232 } 1233 1234 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1235 WHERE ' . $db->sql_in_set('word_id', $sql_in) . ' 1236 AND post_id = ' . intval($post_id) . " 1237 AND title_match = $title_match"; 1238 $db->sql_query($sql); 1239 1240 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1241 SET word_count = word_count - 1 1242 WHERE ' . $db->sql_in_set('word_id', $sql_in) . ' 1243 AND word_count > 0'; 1244 $db->sql_query($sql); 1245 1246 unset($sql_in); 1247 } 1248 } 1249 1250 $db->sql_return_on_error(true); 1251 foreach ($words['add'] as $word_in => $word_ary) 1252 { 1253 $title_match = ($word_in == 'title') ? 1 : 0; 1254 1255 if (sizeof($word_ary)) 1256 { 1257 $sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match) 1258 SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . ' 1259 FROM ' . SEARCH_WORDLIST_TABLE . ' 1260 WHERE ' . $db->sql_in_set('word_text', $word_ary); 1261 $db->sql_query($sql); 1262 1263 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1264 SET word_count = word_count + 1 1265 WHERE ' . $db->sql_in_set('word_text', $word_ary); 1266 $db->sql_query($sql); 1267 } 1268 } 1269 $db->sql_return_on_error(false); 1270 1271 $db->sql_transaction('commit'); 1272 1273 // destroy cached search results containing any of the words removed or added 1274 $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id)); 1275 1276 unset($unique_add_words); 1277 unset($words); 1278 unset($cur_words); 1279 } 1280 1281 /** 1282 * Removes entries from the wordmatch table for the specified post_ids 1283 */ 1284 function index_remove($post_ids, $author_ids, $forum_ids) 1285 { 1286 global $db; 1287 1288 if (sizeof($post_ids)) 1289 { 1290 $sql = 'SELECT w.word_id, w.word_text, m.title_match 1291 FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w 1292 WHERE ' . $db->sql_in_set('m.post_id', $post_ids) . ' 1293 AND w.word_id = m.word_id'; 1294 $result = $db->sql_query($sql); 1295 1296 $message_word_ids = $title_word_ids = $word_texts = array(); 1297 while ($row = $db->sql_fetchrow($result)) 1298 { 1299 if ($row['title_match']) 1300 { 1301 $title_word_ids[] = $row['word_id']; 1302 } 1303 else 1304 { 1305 $message_word_ids[] = $row['word_id']; 1306 } 1307 $word_texts[] = $row['word_text']; 1308 } 1309 $db->sql_freeresult($result); 1310 1311 if (sizeof($title_word_ids)) 1312 { 1313 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1314 SET word_count = word_count - 1 1315 WHERE ' . $db->sql_in_set('word_id', $title_word_ids) . ' 1316 AND word_count > 0'; 1317 $db->sql_query($sql); 1318 } 1319 1320 if (sizeof($message_word_ids)) 1321 { 1322 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1323 SET word_count = word_count - 1 1324 WHERE ' . $db->sql_in_set('word_id', $message_word_ids) . ' 1325 AND word_count > 0'; 1326 $db->sql_query($sql); 1327 } 1328 1329 unset($title_word_ids); 1330 unset($message_word_ids); 1331 1332 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1333 WHERE ' . $db->sql_in_set('post_id', $post_ids); 1334 $db->sql_query($sql); 1335 } 1336 1337 $this->destroy_cache(array_unique($word_texts), array_unique($author_ids)); 1338 } 1339 1340 /** 1341 * Tidy up indexes: Tag 'common words' and remove 1342 * words no longer referenced in the match table 1343 */ 1344 function tidy() 1345 { 1346 global $db, $config; 1347 1348 // Is the fulltext indexer disabled? If yes then we need not 1349 // carry on ... it's okay ... I know when I'm not wanted boo hoo 1350 if (!$config['fulltext_native_load_upd']) 1351 { 1352 set_config('search_last_gc', time(), true); 1353 return; 1354 } 1355 1356 $destroy_cache_words = array(); 1357 1358 // Remove common words 1359 if ($config['num_posts'] >= 100 && $config['fulltext_native_common_thres']) 1360 { 1361 $common_threshold = ((double) $config['fulltext_native_common_thres']) / 100.0; 1362 // First, get the IDs of common words 1363 $sql = 'SELECT word_id, word_text 1364 FROM ' . SEARCH_WORDLIST_TABLE . ' 1365 WHERE word_count > ' . floor($config['num_posts'] * $common_threshold) . ' 1366 OR word_common = 1'; 1367 $result = $db->sql_query($sql); 1368 1369 $sql_in = array(); 1370 while ($row = $db->sql_fetchrow($result)) 1371 { 1372 $sql_in[] = $row['word_id']; 1373 $destroy_cache_words[] = $row['word_text']; 1374 } 1375 $db->sql_freeresult($result); 1376 1377 if (sizeof($sql_in)) 1378 { 1379 // Flag the words 1380 $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . ' 1381 SET word_common = 1 1382 WHERE ' . $db->sql_in_set('word_id', $sql_in); 1383 $db->sql_query($sql); 1384 1385 // by setting search_last_gc to the new time here we make sure that if a user reloads because the 1386 // following query takes too long, he won't run into it again 1387 set_config('search_last_gc', time(), true); 1388 1389 // Delete the matches 1390 $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . ' 1391 WHERE ' . $db->sql_in_set('word_id', $sql_in); 1392 $db->sql_query($sql); 1393 } 1394 unset($sql_in); 1395 } 1396 1397 if (sizeof($destroy_cache_words)) 1398 { 1399 // destroy cached search results containing any of the words that are now common or were removed 1400 $this->destroy_cache(array_unique($destroy_cache_words)); 1401 } 1402 1403 set_config('search_last_gc', time(), true); 1404 } 1405 1406 /** 1407 * Deletes all words from the index 1408 */ 1409 function delete_index($acp_module, $u_action) 1410 { 1411 global $db; 1412 1413 switch ($db->sql_layer) 1414 { 1415 case 'sqlite': 1416 case 'firebird': 1417 $db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE); 1418 $db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE); 1419 $db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE); 1420 break; 1421 1422 default: 1423 $db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE); 1424 $db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE); 1425 $db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE); 1426 break; 1427 } 1428 } 1429 1430 /** 1431 * Returns true if both FULLTEXT indexes exist 1432 */ 1433 function index_created() 1434 { 1435 if (!sizeof($this->stats)) 1436 { 1437 $this->get_stats(); 1438 } 1439 1440 return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false; 1441 } 1442 1443 /** 1444 * Returns an associative array containing information about the indexes 1445 */ 1446 function index_stats() 1447 { 1448 global $user; 1449 1450 if (!sizeof($this->stats)) 1451 { 1452 $this->get_stats(); 1453 } 1454 1455 return array( 1456 $user->lang['TOTAL_WORDS'] => $this->stats['total_words'], 1457 $user->lang['TOTAL_MATCHES'] => $this->stats['total_matches']); 1458 } 1459 1460 function get_stats() 1461 { 1462 global $db; 1463 1464 $this->stats['total_words'] = $db->get_estimated_row_count(SEARCH_WORDLIST_TABLE); 1465 $this->stats['total_matches'] = $db->get_estimated_row_count(SEARCH_WORDMATCH_TABLE); 1466 } 1467 1468 /** 1469 * Clean up a text to remove non-alphanumeric characters 1470 * 1471 * This method receives a UTF-8 string, normalizes and validates it, replaces all 1472 * non-alphanumeric characters with strings then returns the result. 1473 * 1474 * Any number of "allowed chars" can be passed as a UTF-8 string in NFC. 1475 * 1476 * @param string $text Text to split, in UTF-8 (not normalized or sanitized) 1477 * @param string $allowed_chars String of special chars to allow 1478 * @param string $encoding Text encoding 1479 * @return string Cleaned up text, only alphanumeric chars are left 1480 * 1481 * @todo normalizer::cleanup being able to be used? 1482 */ 1483 function cleanup($text, $allowed_chars = null, $encoding = 'utf-8') 1484 { 1485 global $phpbb_root_path, $phpEx; 1486 static $conv = array(), $conv_loaded = array(); 1487 $words = $allow = array(); 1488 1489 // Convert the text to UTF-8 1490 $encoding = strtolower($encoding); 1491 if ($encoding != 'utf-8') 1492 { 1493 $text = utf8_recode($text, $encoding); 1494 } 1495 1496 $utf_len_mask = array( 1497 "\xC0" => 2, 1498 "\xD0" => 2, 1499 "\xE0" => 3, 1500 "\xF0" => 4 1501 ); 1502 1503 /** 1504 * Replace HTML entities and NCRs 1505 */ 1506 $text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES); 1507 1508 /** 1509 * Load the UTF-8 normalizer 1510 * 1511 * If we use it more widely, an instance of that class should be held in a 1512 * a global variable instead 1513 */ 1514 utf_normalizer::nfc($text); 1515 1516 /** 1517 * The first thing we do is: 1518 * 1519 * - convert ASCII-7 letters to lowercase 1520 * - remove the ASCII-7 non-alpha characters 1521 * - remove the bytes that should not appear in a valid UTF-8 string: 0xC0, 1522 * 0xC1 and 0xF5-0xFF 1523 * 1524 * @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars 1525 */ 1526 $sb_match = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF"; 1527 $sb_replace = 'istcpamelrdojbnhfgvwuqkyxz '; 1528 1529 /** 1530 * This is the list of legal ASCII chars, it is automatically extended 1531 * with ASCII chars from $allowed_chars 1532 */ 1533 $legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z'; 1534 1535 /** 1536 * Prepare an array containing the extra chars to allow 1537 */ 1538 if (isset($allowed_chars[0])) 1539 { 1540 $pos = 0; 1541 $len = strlen($allowed_chars); 1542 do 1543 { 1544 $c = $allowed_chars[$pos]; 1545 1546 if ($c < "\x80") 1547 { 1548 /** 1549 * ASCII char 1550 */ 1551 $sb_pos = strpos($sb_match, $c); 1552 if (is_int($sb_pos)) 1553 { 1554 /** 1555 * Remove the char from $sb_match and its corresponding 1556 * replacement in $sb_replace 1557 */ 1558 $sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1); 1559 $sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1); 1560 $legal_ascii .= $c; 1561 } 1562 1563 ++$pos; 1564 } 1565 else 1566 { 1567 /** 1568 * UTF-8 char 1569 */ 1570 $utf_len = $utf_len_mask[$c & "\xF0"]; 1571 $allow[substr($allowed_chars, $pos, $utf_len)] = 1; 1572 $pos += $utf_len; 1573 } 1574 } 1575 while ($pos < $len); 1576 } 1577 1578 $text = strtr($text, $sb_match, $sb_replace); 1579 $ret = ''; 1580 1581 $pos = 0; 1582 $len = strlen($text); 1583 1584 do 1585 { 1586 /** 1587 * Do all consecutive ASCII chars at once 1588 */ 1589 if ($spn = strspn($text, $legal_ascii, $pos)) 1590 { 1591 $ret .= substr($text, $pos, $spn); 1592 $pos += $spn; 1593 } 1594 1595 if ($pos >= $len) 1596 { 1597 return $ret; 1598 } 1599 1600 /** 1601 * Capture the UTF char 1602 */ 1603 $utf_len = $utf_len_mask[$text[$pos] & "\xF0"]; 1604 $utf_char = substr($text, $pos, $utf_len); 1605 $pos += $utf_len; 1606 1607 if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST) 1608 || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST) 1609 || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST)) 1610 { 1611 /** 1612 * All characters within these ranges are valid 1613 * 1614 * We separate them with a space in order to index each character 1615 * individually 1616 */ 1617 $ret .= ' ' . $utf_char . ' '; 1618 continue; 1619 } 1620 1621 if (isset($allow[$utf_char])) 1622 { 1623 /** 1624 * The char is explicitly allowed 1625 */ 1626 $ret .= $utf_char; 1627 continue; 1628 } 1629 1630 if (isset($conv[$utf_char])) 1631 { 1632 /** 1633 * The char is mapped to something, maybe to itself actually 1634 */ 1635 $ret .= $conv[$utf_char]; 1636 continue; 1637 } 1638 1639 /** 1640 * The char isn't mapped, but did we load its conversion table? 1641 * 1642 * The search indexer table is split into blocks. The block number of 1643 * each char is equal to its codepoint right-shifted for 11 bits. It 1644 * means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or 1645 * 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus, 1646 * all UTF chars encoded in 2 bytes are in the same first block. 1647 */ 1648 if (isset($utf_char[2])) 1649 { 1650 if (isset($utf_char[3])) 1651 { 1652 /** 1653 * 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx 1654 * 0000 0111 0011 1111 0010 0000 1655 */ 1656 $idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5); 1657 } 1658 else 1659 { 1660 /** 1661 * 1110 nnnn 10nx xxxx 10xx xxxx 1662 * 0000 0111 0010 0000 1663 */ 1664 $idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5); 1665 } 1666 } 1667 else 1668 { 1669 /** 1670 * 110x xxxx 10xx xxxx 1671 * 0000 0000 0000 0000 1672 */ 1673 $idx = 0; 1674 } 1675 1676 /** 1677 * Check if the required conv table has been loaded already 1678 */ 1679 if (!isset($conv_loaded[$idx])) 1680 { 1681 $conv_loaded[$idx] = 1; 1682 $file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx; 1683 1684 if (file_exists($file)) 1685 { 1686 $conv += include($file); 1687 } 1688 } 1689 1690 if (isset($conv[$utf_char])) 1691 { 1692 $ret .= $conv[$utf_char]; 1693 } 1694 else 1695 { 1696 /** 1697 * We add an entry to the conversion table so that we 1698 * don't have to convert to codepoint and perform the checks 1699 * that are above this block 1700 */ 1701 $conv[$utf_char] = ' '; 1702 $ret .= ' '; 1703 } 1704 } 1705 while (1); 1706 1707 return $ret; 1708 } 1709 1710 /** 1711 * Returns a list of options for the ACP to display 1712 */ 1713 function acp() 1714 { 1715 global $user, $config; 1716 1717 1718 /** 1719 * if we need any options, copied from fulltext_native for now, will have to be adjusted or removed 1720 */ 1721 1722 $tpl = ' 1723 <dl> 1724 <dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt> 1725 <dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</label></dd> 1726 </dl> 1727 <dl> 1728 <dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt> 1729 <dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd> 1730 </dl> 1731 <dl> 1732 <dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt> 1733 <dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd> 1734 </dl> 1735 <dl> 1736 <dt><label for="fulltext_native_common_thres">' . $user->lang['COMMON_WORD_THRESHOLD'] . ':</label><br /><span>' . $user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt> 1737 <dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $config['fulltext_native_common_thres'] . '" /> %</dd> 1738 </dl> 1739 '; 1740 1741 // These are fields required in the config table 1742 return array( 1743 'tpl' => $tpl, 1744 'config' => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100') 1745 ); 1746 } 1747 } 1748 1749 ?>
title
Description
Body
title
Description
Body
title
Description
Body
title
Body
Generated: Wed Oct 2 15:03:47 2013 | Cross-referenced by PHPXref 0.7.1 |