[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/includes/search/ -> fulltext_native.php (source)

   1  <?php
   2  /**
   3  *
   4  * @package search
   5  * @version $Id$
   6  * @copyright (c) 2005 phpBB Group
   7  * @license http://opensource.org/licenses/gpl-license.php GNU Public License
   8  *
   9  */
  10  
  11  /**
  12  * @ignore
  13  */
  14  if (!defined('IN_PHPBB'))
  15  {
  16      exit;
  17  }
  18  
  19  /**
  20  * @ignore
  21  */
  22  include_once($phpbb_root_path . 'includes/search/search.' . $phpEx);
  23  
  24  /**
  25  * fulltext_native
  26  * phpBB's own db driven fulltext search, version 2
  27  * @package search
  28  */
  29  class fulltext_native extends search_backend
  30  {
  31      var $stats = array();
  32      var $word_length = array();
  33      var $search_query;
  34      var $common_words = array();
  35  
  36      var $must_contain_ids = array();
  37      var $must_not_contain_ids = array();
  38      var $must_exclude_one_ids = array();
  39  
  40      /**
  41      * Initialises the fulltext_native search backend with min/max word length and makes sure the UTF-8 normalizer is loaded.
  42      *
  43      * @param    boolean|string    &$error    is passed by reference and should either be set to false on success or an error message on failure.
  44      *
  45      * @access    public
  46      */
  47  	function fulltext_native(&$error)
  48      {
  49          global $phpbb_root_path, $phpEx, $config;
  50  
  51          $this->word_length = array('min' => $config['fulltext_native_min_chars'], 'max' => $config['fulltext_native_max_chars']);
  52  
  53          /**
  54          * Load the UTF tools
  55          */
  56          if (!class_exists('utf_normalizer'))
  57          {
  58              include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
  59          }
  60  
  61  
  62          $error = false;
  63      }
  64  
  65      /**
  66      * This function fills $this->search_query with the cleaned user search query.
  67      *
  68      * If $terms is 'any' then the words will be extracted from the search query
  69      * and combined with | inside brackets. They will afterwards be treated like
  70      * an standard search query.
  71      *
  72      * Then it analyses the query and fills the internal arrays $must_not_contain_ids,
  73      * $must_contain_ids and $must_exclude_one_ids which are later used by keyword_search().
  74      *
  75      * @param    string    $keywords    contains the search query string as entered by the user
  76      * @param    string    $terms        is either 'all' (use search query as entered, default words to 'must be contained in post')
  77      *     or 'any' (find all posts containing at least one of the given words)
  78      * @return    boolean                false if no valid keywords were found and otherwise true
  79      *
  80      * @access    public
  81      */
  82  	function split_keywords($keywords, $terms)
  83      {
  84          global $db, $user, $config;
  85  
  86          $tokens = '+-|()*';
  87  
  88          $keywords = trim($this->cleanup($keywords, $tokens));
  89  
  90          // allow word|word|word without brackets
  91          if ((strpos($keywords, ' ') === false) && (strpos($keywords, '|') !== false) && (strpos($keywords, '(') === false))
  92          {
  93              $keywords = '(' . $keywords . ')';
  94          }
  95  
  96          $open_bracket = $space = false;
  97          for ($i = 0, $n = strlen($keywords); $i < $n; $i++)
  98          {
  99              if ($open_bracket !== false)
 100              {
 101                  switch ($keywords[$i])
 102                  {
 103                      case ')':
 104                          if ($open_bracket + 1 == $i)
 105                          {
 106                              $keywords[$i - 1] = '|';
 107                              $keywords[$i] = '|';
 108                          }
 109                          $open_bracket = false;
 110                      break;
 111                      case '(':
 112                          $keywords[$i] = '|';
 113                      break;
 114                      case '+':
 115                      case '-':
 116                      case ' ':
 117                          $keywords[$i] = '|';
 118                      break;
 119                      case '*':
 120                          if ($i === 0 || ($keywords[$i - 1] !== '*' && strcspn($keywords[$i - 1], $tokens) === 0))
 121                          {
 122                              if ($i === $n - 1 || ($keywords[$i + 1] !== '*' && strcspn($keywords[$i + 1], $tokens) === 0))
 123                              {
 124                                  $keywords = substr($keywords, 0, $i) . substr($keywords, $i + 1);
 125                              }
 126                          }
 127                      break;
 128                  }
 129              }
 130              else
 131              {
 132                  switch ($keywords[$i])
 133                  {
 134                      case ')':
 135                          $keywords[$i] = ' ';
 136                      break;
 137                      case '(':
 138                          $open_bracket = $i;
 139                          $space = false;
 140                      break;
 141                      case '|':
 142                          $keywords[$i] = ' ';
 143                      break;
 144                      case '-':
 145                      case '+':
 146                          $space = $keywords[$i];
 147                      break;
 148                      case ' ':
 149                          if ($space !== false)
 150                          {
 151                              $keywords[$i] = $space;
 152                          }
 153                      break;
 154                      default:
 155                          $space = false;
 156                  }
 157              }
 158          }
 159  
 160          if ($open_bracket)
 161          {
 162              $keywords .= ')';
 163          }
 164  
 165          $match = array(
 166              '#  +#',
 167              '#\|\|+#',
 168              '#(\+|\-)(?:\+|\-)+#',
 169              '#\(\|#',
 170              '#\|\)#',
 171          );
 172          $replace = array(
 173              ' ',
 174              '|',
 175              '$1',
 176              '(',
 177              ')',
 178          );
 179  
 180          $keywords = preg_replace($match, $replace, $keywords);
 181          $num_keywords = sizeof(explode(' ', $keywords));
 182  
 183          // We limit the number of allowed keywords to minimize load on the database
 184          if ($config['max_num_search_keywords'] && $num_keywords > $config['max_num_search_keywords'])
 185          {
 186              trigger_error($user->lang('MAX_NUM_SEARCH_KEYWORDS_REFINE', $config['max_num_search_keywords'], $num_keywords));
 187          }
 188  
 189          // $keywords input format: each word separated by a space, words in a bracket are not separated
 190  
 191          // the user wants to search for any word, convert the search query
 192          if ($terms == 'any')
 193          {
 194              $words = array();
 195  
 196              preg_match_all('#([^\\s+\\-|()]+)(?:$|[\\s+\\-|()])#u', $keywords, $words);
 197              if (sizeof($words[1]))
 198              {
 199                  $keywords = '(' . implode('|', $words[1]) . ')';
 200              }
 201          }
 202  
 203          // set the search_query which is shown to the user
 204          $this->search_query = $keywords;
 205  
 206          $exact_words = array();
 207          preg_match_all('#([^\\s+\\-|*()]+)(?:$|[\\s+\\-|()])#u', $keywords, $exact_words);
 208          $exact_words = $exact_words[1];
 209  
 210          $common_ids = $words = array();
 211  
 212          if (sizeof($exact_words))
 213          {
 214              $sql = 'SELECT word_id, word_text, word_common
 215                  FROM ' . SEARCH_WORDLIST_TABLE . '
 216                  WHERE ' . $db->sql_in_set('word_text', $exact_words) . '
 217                  ORDER BY word_count ASC';
 218              $result = $db->sql_query($sql);
 219  
 220              // store an array of words and ids, remove common words
 221              while ($row = $db->sql_fetchrow($result))
 222              {
 223                  if ($row['word_common'])
 224                  {
 225                      $this->common_words[] = $row['word_text'];
 226                      $common_ids[$row['word_text']] = (int) $row['word_id'];
 227                      continue;
 228                  }
 229  
 230                  $words[$row['word_text']] = (int) $row['word_id'];
 231              }
 232              $db->sql_freeresult($result);
 233          }
 234          unset($exact_words);
 235  
 236          // now analyse the search query, first split it using the spaces
 237          $query = explode(' ', $keywords);
 238  
 239          $this->must_contain_ids = array();
 240          $this->must_not_contain_ids = array();
 241          $this->must_exclude_one_ids = array();
 242  
 243          $mode = '';
 244          $ignore_no_id = true;
 245  
 246          foreach ($query as $word)
 247          {
 248              if (empty($word))
 249              {
 250                  continue;
 251              }
 252  
 253              // words which should not be included
 254              if ($word[0] == '-')
 255              {
 256                  $word = substr($word, 1);
 257  
 258                  // a group of which at least one may not be in the resulting posts
 259                  if ($word[0] == '(')
 260                  {
 261                      $word = array_unique(explode('|', substr($word, 1, -1)));
 262                      $mode = 'must_exclude_one';
 263                  }
 264                  // one word which should not be in the resulting posts
 265                  else
 266                  {
 267                      $mode = 'must_not_contain';
 268                  }
 269                  $ignore_no_id = true;
 270              }
 271              // words which have to be included
 272              else
 273              {
 274                  // no prefix is the same as a +prefix
 275                  if ($word[0] == '+')
 276                  {
 277                      $word = substr($word, 1);
 278                  }
 279  
 280                  // a group of words of which at least one word should be in every resulting post
 281                  if ($word[0] == '(')
 282                  {
 283                      $word = array_unique(explode('|', substr($word, 1, -1)));
 284                  }
 285                  $ignore_no_id = false;
 286                  $mode = 'must_contain';
 287              }
 288  
 289              if (empty($word))
 290              {
 291                  continue;
 292              }
 293  
 294              // if this is an array of words then retrieve an id for each
 295              if (is_array($word))
 296              {
 297                  $non_common_words = array();
 298                  $id_words = array();
 299                  foreach ($word as $i => $word_part)
 300                  {
 301                      if (strpos($word_part, '*') !== false)
 302                      {
 303                          $id_words[] = '\'' . $db->sql_escape(str_replace('*', '%', $word_part)) . '\'';
 304                          $non_common_words[] = $word_part;
 305                      }
 306                      else if (isset($words[$word_part]))
 307                      {
 308                          $id_words[] = $words[$word_part];
 309                          $non_common_words[] = $word_part;
 310                      }
 311                      else
 312                      {
 313                          $len = utf8_strlen($word_part);
 314                          if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 315                          {
 316                              $this->common_words[] = $word_part;
 317                          }
 318                      }
 319                  }
 320                  if (sizeof($id_words))
 321                  {
 322                      sort($id_words);
 323                      if (sizeof($id_words) > 1)
 324                      {
 325                          $this->{$mode . '_ids'}[] = $id_words;
 326                      }
 327                      else
 328                      {
 329                          $mode = ($mode == 'must_exclude_one') ? 'must_not_contain' : $mode;
 330                          $this->{$mode . '_ids'}[] = $id_words[0];
 331                      }
 332                  }
 333                  // throw an error if we shall not ignore unexistant words
 334                  else if (!$ignore_no_id && sizeof($non_common_words))
 335                  {
 336                      trigger_error(sprintf($user->lang['WORDS_IN_NO_POST'], implode(', ', $non_common_words)));
 337                  }
 338                  unset($non_common_words);
 339              }
 340              // else we only need one id
 341              else if (($wildcard = strpos($word, '*') !== false) || isset($words[$word]))
 342              {
 343                  if ($wildcard)
 344                  {
 345                      $len = utf8_strlen(str_replace('*', '', $word));
 346                      if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
 347                      {
 348                          $this->{$mode . '_ids'}[] = '\'' . $db->sql_escape(str_replace('*', '%', $word)) . '\'';
 349                      }
 350                      else
 351                      {
 352                          $this->common_words[] = $word;
 353                      }
 354                  }
 355                  else
 356                  {
 357                      $this->{$mode . '_ids'}[] = $words[$word];
 358                  }
 359              }
 360              // throw an error if we shall not ignore unexistant words
 361              else if (!$ignore_no_id)
 362              {
 363                  if (!isset($common_ids[$word]))
 364                  {
 365                      $len = utf8_strlen($word);
 366                      if ($len >= $this->word_length['min'] && $len <= $this->word_length['max'])
 367                      {
 368                          trigger_error(sprintf($user->lang['WORD_IN_NO_POST'], $word));
 369                      }
 370                      else
 371                      {
 372                          $this->common_words[] = $word;
 373                      }
 374                  }
 375              }
 376              else
 377              {
 378                  $len = utf8_strlen($word);
 379                  if ($len < $this->word_length['min'] || $len > $this->word_length['max'])
 380                  {
 381                      $this->common_words[] = $word;
 382                  }
 383              }
 384          }
 385  
 386          // we can't search for negatives only
 387          if (!sizeof($this->must_contain_ids))
 388          {
 389              return false;
 390          }
 391  
 392          if (!empty($this->search_query))
 393          {
 394              return true;
 395          }
 396          return false;
 397      }
 398  
 399      /**
 400      * Performs a search on keywords depending on display specific params. You have to run split_keywords() first.
 401      *
 402      * @param    string        $type                contains either posts or topics depending on what should be searched for
 403      * @param    string        $fields                contains either titleonly (topic titles should be searched), msgonly (only message bodies should be searched), firstpost (only subject and body of the first post should be searched) or all (all post bodies and subjects should be searched)
 404      * @param    string        $terms                is either 'all' (use query as entered, words without prefix should default to "have to be in field") or 'any' (ignore search query parts and just return all posts that contain any of the specified words)
 405      * @param    array        $sort_by_sql        contains SQL code for the ORDER BY part of a query
 406      * @param    string        $sort_key            is the key of $sort_by_sql for the selected sorting
 407      * @param    string        $sort_dir            is either a or d representing ASC and DESC
 408      * @param    string        $sort_days            specifies the maximum amount of days a post may be old
 409      * @param    array        $ex_fid_ary            specifies an array of forum ids which should not be searched
 410      * @param    array        $m_approve_fid_ary    specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 411      * @param    int            $topic_id            is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 412      * @param    array        $author_ary            an array of author ids if the author should be ignored during the search the array is empty
 413      * @param    string        $author_name        specifies the author match, when ANONYMOUS is also a search-match
 414      * @param    array        &$id_ary            passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 415      * @param    int            $start                indicates the first index of the page
 416      * @param    int            $per_page            number of ids each page is supposed to contain
 417      * @return    boolean|int                        total number of results
 418      *
 419      * @access    public
 420      */
 421  	function keyword_search($type, $fields, $terms, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page)
 422      {
 423          global $config, $db;
 424  
 425          // No keywords? No posts.
 426          if (empty($this->search_query))
 427          {
 428              return false;
 429          }
 430  
 431          $must_contain_ids = $this->must_contain_ids;
 432          $must_not_contain_ids = $this->must_not_contain_ids;
 433          $must_exclude_one_ids = $this->must_exclude_one_ids;
 434  
 435          sort($must_contain_ids);
 436          sort($must_not_contain_ids);
 437          sort($must_exclude_one_ids);
 438  
 439          // generate a search_key from all the options to identify the results
 440          $search_key = md5(implode('#', array(
 441              serialize($must_contain_ids),
 442              serialize($must_not_contain_ids),
 443              serialize($must_exclude_one_ids),
 444              $type,
 445              $fields,
 446              $terms,
 447              $sort_days,
 448              $sort_key,
 449              $topic_id,
 450              implode(',', $ex_fid_ary),
 451              implode(',', $m_approve_fid_ary),
 452              implode(',', $author_ary),
 453              $author_name,
 454          )));
 455  
 456          // try reading the results from cache
 457          $total_results = 0;
 458          if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 459          {
 460              return $total_results;
 461          }
 462  
 463          $id_ary = array();
 464  
 465          $sql_where = array();
 466          $group_by = false;
 467          $m_num = 0;
 468          $w_num = 0;
 469  
 470          $sql_array = array(
 471              'SELECT'    => ($type == 'posts') ? 'p.post_id' : 'p.topic_id',
 472              'FROM'        => array(
 473                  SEARCH_WORDMATCH_TABLE    => array(),
 474                  SEARCH_WORDLIST_TABLE    => array(),
 475              ),
 476              'LEFT_JOIN' => array(array(
 477                  'FROM'    => array(POSTS_TABLE => 'p'),
 478                  'ON'    => 'm0.post_id = p.post_id',
 479              )),
 480          );
 481  
 482          $title_match = '';
 483          $left_join_topics = false;
 484          $group_by = true;
 485          // Build some display specific sql strings
 486          switch ($fields)
 487          {
 488              case 'titleonly':
 489                  $title_match = 'title_match = 1';
 490                  $group_by = false;
 491              // no break
 492              case 'firstpost':
 493                  $left_join_topics = true;
 494                  $sql_where[] = 'p.post_id = t.topic_first_post_id';
 495              break;
 496  
 497              case 'msgonly':
 498                  $title_match = 'title_match = 0';
 499                  $group_by = false;
 500              break;
 501          }
 502  
 503          if ($type == 'topics')
 504          {
 505              $left_join_topics = true;
 506              $group_by = true;
 507          }
 508  
 509          /**
 510          * @todo Add a query optimizer (handle stuff like "+(4|3) +4")
 511          */
 512  
 513          foreach ($this->must_contain_ids as $subquery)
 514          {
 515              if (is_array($subquery))
 516              {
 517                  $group_by = true;
 518  
 519                  $word_id_sql = array();
 520                  $word_ids = array();
 521                  foreach ($subquery as $id)
 522                  {
 523                      if (is_string($id))
 524                      {
 525                          $sql_array['LEFT_JOIN'][] = array(
 526                              'FROM'    => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 527                              'ON'    => "w$w_num.word_text LIKE $id"
 528                          );
 529                          $word_ids[] = "w$w_num.word_id";
 530  
 531                          $w_num++;
 532                      }
 533                      else
 534                      {
 535                          $word_ids[] = $id;
 536                      }
 537                  }
 538  
 539                  $sql_where[] = $db->sql_in_set("m$m_num.word_id", $word_ids);
 540  
 541                  unset($word_id_sql);
 542                  unset($word_ids);
 543              }
 544              else if (is_string($subquery))
 545              {
 546                  $sql_array['FROM'][SEARCH_WORDLIST_TABLE][] = 'w' . $w_num;
 547  
 548                  $sql_where[] = "w$w_num.word_text LIKE $subquery";
 549                  $sql_where[] = "m$m_num.word_id = w$w_num.word_id";
 550  
 551                  $group_by = true;
 552                  $w_num++;
 553              }
 554              else
 555              {
 556                  $sql_where[] = "m$m_num.word_id = $subquery";
 557              }
 558  
 559              $sql_array['FROM'][SEARCH_WORDMATCH_TABLE][] = 'm' . $m_num;
 560  
 561              if ($title_match)
 562              {
 563                  $sql_where[] = "m$m_num.$title_match";
 564              }
 565  
 566              if ($m_num != 0)
 567              {
 568                  $sql_where[] = "m$m_num.post_id = m0.post_id";
 569              }
 570              $m_num++;
 571          }
 572  
 573          foreach ($this->must_not_contain_ids as $key => $subquery)
 574          {
 575              if (is_string($subquery))
 576              {
 577                  $sql_array['LEFT_JOIN'][] = array(
 578                      'FROM'    => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 579                      'ON'    => "w$w_num.word_text LIKE $subquery"
 580                  );
 581  
 582                  $this->must_not_contain_ids[$key] = "w$w_num.word_id";
 583  
 584                  $group_by = true;
 585                  $w_num++;
 586              }
 587          }
 588  
 589          if (sizeof($this->must_not_contain_ids))
 590          {
 591              $sql_array['LEFT_JOIN'][] = array(
 592                  'FROM'    => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 593                  'ON'    => $db->sql_in_set("m$m_num.word_id", $this->must_not_contain_ids) . (($title_match) ? " AND m$m_num.$title_match" : '') . " AND m$m_num.post_id = m0.post_id"
 594              );
 595  
 596              $sql_where[] = "m$m_num.word_id IS NULL";
 597              $m_num++;
 598          }
 599  
 600          foreach ($this->must_exclude_one_ids as $ids)
 601          {
 602              $is_null_joins = array();
 603              foreach ($ids as $id)
 604              {
 605                  if (is_string($id))
 606                  {
 607                      $sql_array['LEFT_JOIN'][] = array(
 608                          'FROM'    => array(SEARCH_WORDLIST_TABLE => 'w' . $w_num),
 609                          'ON'    => "w$w_num.word_text LIKE $id"
 610                      );
 611                      $id = "w$w_num.word_id";
 612  
 613                      $group_by = true;
 614                      $w_num++;
 615                  }
 616  
 617                  $sql_array['LEFT_JOIN'][] = array(
 618                      'FROM'    => array(SEARCH_WORDMATCH_TABLE => 'm' . $m_num),
 619                      'ON'    => "m$m_num.word_id = $id AND m$m_num.post_id = m0.post_id" . (($title_match) ? " AND m$m_num.$title_match" : '')
 620                  );
 621                  $is_null_joins[] = "m$m_num.word_id IS NULL";
 622  
 623                  $m_num++;
 624              }
 625              $sql_where[] = '(' . implode(' OR ', $is_null_joins) . ')';
 626          }
 627  
 628          if (!sizeof($m_approve_fid_ary))
 629          {
 630              $sql_where[] = 'p.post_approved = 1';
 631          }
 632          else if ($m_approve_fid_ary !== array(-1))
 633          {
 634              $sql_where[] = '(p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 635          }
 636  
 637          if ($topic_id)
 638          {
 639              $sql_where[] = 'p.topic_id = ' . $topic_id;
 640          }
 641  
 642          if (sizeof($author_ary))
 643          {
 644              if ($author_name)
 645              {
 646                  // first one matches post of registered users, second one guests and deleted users
 647                  $sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
 648              }
 649              else
 650              {
 651                  $sql_author = $db->sql_in_set('p.poster_id', $author_ary);
 652              }
 653              $sql_where[] = $sql_author;
 654          }
 655  
 656          if (sizeof($ex_fid_ary))
 657          {
 658              $sql_where[] = $db->sql_in_set('p.forum_id', $ex_fid_ary, true);
 659          }
 660  
 661          if ($sort_days)
 662          {
 663              $sql_where[] = 'p.post_time >= ' . (time() - ($sort_days * 86400));
 664          }
 665  
 666          $sql_array['WHERE'] = implode(' AND ', $sql_where);
 667  
 668          $is_mysql = false;
 669          // if the total result count is not cached yet, retrieve it from the db
 670          if (!$total_results)
 671          {
 672              $sql = '';
 673              $sql_array_count = $sql_array;
 674  
 675              if ($left_join_topics)
 676              {
 677                  $sql_array_count['LEFT_JOIN'][] = array(
 678                      'FROM'    => array(TOPICS_TABLE => 't'),
 679                      'ON'    => 'p.topic_id = t.topic_id'
 680                  );
 681              }
 682  
 683              switch ($db->sql_layer)
 684              {
 685                  case 'mysql4':
 686                  case 'mysqli':
 687  
 688                      // 3.x does not support SQL_CALC_FOUND_ROWS
 689                      // $sql_array['SELECT'] = 'SQL_CALC_FOUND_ROWS ' . $sql_array['SELECT'];
 690                      $is_mysql = true;
 691  
 692                  break;
 693  
 694                  case 'sqlite':
 695                      $sql_array_count['SELECT'] = ($type == 'posts') ? 'DISTINCT p.post_id' : 'DISTINCT p.topic_id';
 696                      $sql = 'SELECT COUNT(' . (($type == 'posts') ? 'post_id' : 'topic_id') . ') as total_results
 697                              FROM (' . $db->sql_build_query('SELECT', $sql_array_count) . ')';
 698  
 699                  // no break
 700  
 701                  default:
 702                      $sql_array_count['SELECT'] = ($type == 'posts') ? 'COUNT(DISTINCT p.post_id) AS total_results' : 'COUNT(DISTINCT p.topic_id) AS total_results';
 703                      $sql = (!$sql) ? $db->sql_build_query('SELECT', $sql_array_count) : $sql;
 704  
 705                      $result = $db->sql_query($sql);
 706                      $total_results = (int) $db->sql_fetchfield('total_results');
 707                      $db->sql_freeresult($result);
 708  
 709                      if (!$total_results)
 710                      {
 711                          return false;
 712                      }
 713                  break;
 714              }
 715  
 716              unset($sql_array_count, $sql);
 717          }
 718  
 719          // Build sql strings for sorting
 720          $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 721  
 722          switch ($sql_sort[0])
 723          {
 724              case 'u':
 725                  $sql_array['FROM'][USERS_TABLE] = 'u';
 726                  $sql_where[] = 'u.user_id = p.poster_id ';
 727              break;
 728  
 729              case 't':
 730                  $left_join_topics = true;
 731              break;
 732  
 733              case 'f':
 734                  $sql_array['FROM'][FORUMS_TABLE] = 'f';
 735                  $sql_where[] = 'f.forum_id = p.forum_id';
 736              break;
 737          }
 738  
 739          if ($left_join_topics)
 740          {
 741              $sql_array['LEFT_JOIN'][] = array(
 742                  'FROM'    => array(TOPICS_TABLE => 't'),
 743                  'ON'    => 'p.topic_id = t.topic_id'
 744              );
 745          }
 746  
 747          $sql_array['WHERE'] = implode(' AND ', $sql_where);
 748          $sql_array['GROUP_BY'] = ($group_by) ? (($type == 'posts') ? 'p.post_id' : 'p.topic_id') . ', ' . $sort_by_sql[$sort_key] : '';
 749          $sql_array['ORDER_BY'] = $sql_sort;
 750  
 751          unset($sql_where, $sql_sort, $group_by);
 752  
 753          $sql = $db->sql_build_query('SELECT', $sql_array);
 754          $result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 755  
 756          while ($row = $db->sql_fetchrow($result))
 757          {
 758              $id_ary[] = (int) $row[(($type == 'posts') ? 'post_id' : 'topic_id')];
 759          }
 760          $db->sql_freeresult($result);
 761  
 762          if (!sizeof($id_ary))
 763          {
 764              return false;
 765          }
 766  
 767          // if we use mysql and the total result count is not cached yet, retrieve it from the db
 768          if (!$total_results && $is_mysql)
 769          {
 770              // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
 771              $sql_array_copy = $sql_array;
 772              $sql_array_copy['SELECT'] = 'SQL_CALC_FOUND_ROWS p.post_id ';
 773  
 774              $sql = $db->sql_build_query('SELECT', $sql_array_copy);
 775              unset($sql_array_copy);
 776  
 777              $db->sql_query($sql);
 778              $db->sql_freeresult($result);
 779  
 780              $sql = 'SELECT FOUND_ROWS() as total_results';
 781              $result = $db->sql_query($sql);
 782              $total_results = (int) $db->sql_fetchfield('total_results');
 783              $db->sql_freeresult($result);
 784  
 785              if (!$total_results)
 786              {
 787                  return false;
 788              }
 789          }
 790  
 791          // store the ids, from start on then delete anything that isn't on the current page because we only need ids for one page
 792          $this->save_ids($search_key, $this->search_query, $author_ary, $total_results, $id_ary, $start, $sort_dir);
 793          $id_ary = array_slice($id_ary, 0, (int) $per_page);
 794  
 795          return $total_results;
 796      }
 797  
 798      /**
 799      * Performs a search on an author's posts without caring about message contents. Depends on display specific params
 800      *
 801      * @param    string        $type                contains either posts or topics depending on what should be searched for
 802      * @param    boolean        $firstpost_only        if true, only topic starting posts will be considered
 803      * @param    array        $sort_by_sql        contains SQL code for the ORDER BY part of a query
 804      * @param    string        $sort_key            is the key of $sort_by_sql for the selected sorting
 805      * @param    string        $sort_dir            is either a or d representing ASC and DESC
 806      * @param    string        $sort_days            specifies the maximum amount of days a post may be old
 807      * @param    array        $ex_fid_ary            specifies an array of forum ids which should not be searched
 808      * @param    array        $m_approve_fid_ary    specifies an array of forum ids in which the searcher is allowed to view unapproved posts
 809      * @param    int            $topic_id            is set to 0 or a topic id, if it is not 0 then only posts in this topic should be searched
 810      * @param    array        $author_ary            an array of author ids
 811      * @param    string        $author_name        specifies the author match, when ANONYMOUS is also a search-match
 812      * @param    array        &$id_ary            passed by reference, to be filled with ids for the page specified by $start and $per_page, should be ordered
 813      * @param    int            $start                indicates the first index of the page
 814      * @param    int            $per_page            number of ids each page is supposed to contain
 815      * @return    boolean|int                        total number of results
 816      *
 817      * @access    public
 818      */
 819  	function author_search($type, $firstpost_only, $sort_by_sql, $sort_key, $sort_dir, $sort_days, $ex_fid_ary, $m_approve_fid_ary, $topic_id, $author_ary, $author_name, &$id_ary, $start, $per_page)
 820      {
 821          global $config, $db;
 822  
 823          // No author? No posts.
 824          if (!sizeof($author_ary))
 825          {
 826              return 0;
 827          }
 828  
 829          // generate a search_key from all the options to identify the results
 830          $search_key = md5(implode('#', array(
 831              '',
 832              $type,
 833              ($firstpost_only) ? 'firstpost' : '',
 834              '',
 835              '',
 836              $sort_days,
 837              $sort_key,
 838              $topic_id,
 839              implode(',', $ex_fid_ary),
 840              implode(',', $m_approve_fid_ary),
 841              implode(',', $author_ary),
 842              $author_name,
 843          )));
 844  
 845          // try reading the results from cache
 846          $total_results = 0;
 847          if ($this->obtain_ids($search_key, $total_results, $id_ary, $start, $per_page, $sort_dir) == SEARCH_RESULT_IN_CACHE)
 848          {
 849              return $total_results;
 850          }
 851  
 852          $id_ary = array();
 853  
 854          // Create some display specific sql strings
 855          if ($author_name)
 856          {
 857              // first one matches post of registered users, second one guests and deleted users
 858              $sql_author = '(' . $db->sql_in_set('p.poster_id', array_diff($author_ary, array(ANONYMOUS)), false, true) . ' OR p.post_username ' . $author_name . ')';
 859          }
 860          else
 861          {
 862              $sql_author = $db->sql_in_set('p.poster_id', $author_ary);
 863          }
 864          $sql_fora        = (sizeof($ex_fid_ary)) ? ' AND ' . $db->sql_in_set('p.forum_id', $ex_fid_ary, true) : '';
 865          $sql_time        = ($sort_days) ? ' AND p.post_time >= ' . (time() - ($sort_days * 86400)) : '';
 866          $sql_topic_id    = ($topic_id) ? ' AND p.topic_id = ' . (int) $topic_id : '';
 867          $sql_firstpost = ($firstpost_only) ? ' AND p.post_id = t.topic_first_post_id' : '';
 868  
 869          // Build sql strings for sorting
 870          $sql_sort = $sort_by_sql[$sort_key] . (($sort_dir == 'a') ? ' ASC' : ' DESC');
 871          $sql_sort_table = $sql_sort_join = '';
 872          switch ($sql_sort[0])
 873          {
 874              case 'u':
 875                  $sql_sort_table    = USERS_TABLE . ' u, ';
 876                  $sql_sort_join    = ' AND u.user_id = p.poster_id ';
 877              break;
 878  
 879              case 't':
 880                  $sql_sort_table    = ($type == 'posts' && !$firstpost_only) ? TOPICS_TABLE . ' t, ' : '';
 881                  $sql_sort_join    = ($type == 'posts' && !$firstpost_only) ? ' AND t.topic_id = p.topic_id ' : '';
 882              break;
 883  
 884              case 'f':
 885                  $sql_sort_table    = FORUMS_TABLE . ' f, ';
 886                  $sql_sort_join    = ' AND f.forum_id = p.forum_id ';
 887              break;
 888          }
 889  
 890          if (!sizeof($m_approve_fid_ary))
 891          {
 892              $m_approve_fid_sql = ' AND p.post_approved = 1';
 893          }
 894          else if ($m_approve_fid_ary == array(-1))
 895          {
 896              $m_approve_fid_sql = '';
 897          }
 898          else
 899          {
 900              $m_approve_fid_sql = ' AND (p.post_approved = 1 OR ' . $db->sql_in_set('p.forum_id', $m_approve_fid_ary, true) . ')';
 901          }
 902  
 903          $select = ($type == 'posts') ? 'p.post_id' : 't.topic_id';
 904          $is_mysql = false;
 905  
 906          // If the cache was completely empty count the results
 907          if (!$total_results)
 908          {
 909              switch ($db->sql_layer)
 910              {
 911                  case 'mysql4':
 912                  case 'mysqli':
 913  //                    $select = 'SQL_CALC_FOUND_ROWS ' . $select;
 914                      $is_mysql = true;
 915                  break;
 916  
 917                  default:
 918                      if ($type == 'posts')
 919                      {
 920                          $sql = 'SELECT COUNT(p.post_id) as total_results
 921                              FROM ' . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t ' : ' ') . "
 922                              WHERE $sql_author
 923                                  $sql_topic_id
 924                                  $sql_firstpost
 925                                  $m_approve_fid_sql
 926                                  $sql_fora
 927                                  $sql_time";
 928                      }
 929                      else
 930                      {
 931                          if ($db->sql_layer == 'sqlite')
 932                          {
 933                              $sql = 'SELECT COUNT(topic_id) as total_results
 934                                  FROM (SELECT DISTINCT t.topic_id';
 935                          }
 936                          else
 937                          {
 938                              $sql = 'SELECT COUNT(DISTINCT t.topic_id) as total_results';
 939                          }
 940  
 941                          $sql .= ' FROM ' . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 942                              WHERE $sql_author
 943                                  $sql_topic_id
 944                                  $sql_firstpost
 945                                  $m_approve_fid_sql
 946                                  $sql_fora
 947                                  AND t.topic_id = p.topic_id
 948                                  $sql_time" . (($db->sql_layer == 'sqlite') ? ')' : '');
 949                      }
 950                      $result = $db->sql_query($sql);
 951  
 952                      $total_results = (int) $db->sql_fetchfield('total_results');
 953                      $db->sql_freeresult($result);
 954  
 955                      if (!$total_results)
 956                      {
 957                          return false;
 958                      }
 959                  break;
 960              }
 961          }
 962  
 963          // Build the query for really selecting the post_ids
 964          if ($type == 'posts')
 965          {
 966              $sql = "SELECT $select
 967                  FROM " . $sql_sort_table . POSTS_TABLE . ' p' . (($firstpost_only) ? ', ' . TOPICS_TABLE . ' t' : '') . "
 968                  WHERE $sql_author
 969                      $sql_topic_id
 970                      $sql_firstpost
 971                      $m_approve_fid_sql
 972                      $sql_fora
 973                      $sql_sort_join
 974                      $sql_time
 975                  ORDER BY $sql_sort";
 976              $field = 'post_id';
 977          }
 978          else
 979          {
 980              $sql = "SELECT $select
 981                  FROM " . $sql_sort_table . TOPICS_TABLE . ' t, ' . POSTS_TABLE . " p
 982                  WHERE $sql_author
 983                      $sql_topic_id
 984                      $sql_firstpost
 985                      $m_approve_fid_sql
 986                      $sql_fora
 987                      AND t.topic_id = p.topic_id
 988                      $sql_sort_join
 989                      $sql_time
 990                  GROUP BY t.topic_id, " . $sort_by_sql[$sort_key] . '
 991                  ORDER BY ' . $sql_sort;
 992              $field = 'topic_id';
 993          }
 994  
 995          // Only read one block of posts from the db and then cache it
 996          $result = $db->sql_query_limit($sql, $config['search_block_size'], $start);
 997  
 998          while ($row = $db->sql_fetchrow($result))
 999          {
1000              $id_ary[] = (int) $row[$field];
1001          }
1002          $db->sql_freeresult($result);
1003  
1004          if (!$total_results && $is_mysql)
1005          {
1006              // Count rows for the executed queries. Replace $select within $sql with SQL_CALC_FOUND_ROWS, and run it.
1007              $sql = str_replace('SELECT ' . $select, 'SELECT DISTINCT SQL_CALC_FOUND_ROWS p.post_id', $sql);
1008  
1009              $db->sql_query($sql);
1010              $db->sql_freeresult($result);
1011  
1012              $sql = 'SELECT FOUND_ROWS() as total_results';
1013              $result = $db->sql_query($sql);
1014              $total_results = (int) $db->sql_fetchfield('total_results');
1015              $db->sql_freeresult($result);
1016  
1017              if (!$total_results)
1018              {
1019                  return false;
1020              }
1021          }
1022  
1023          if (sizeof($id_ary))
1024          {
1025              $this->save_ids($search_key, '', $author_ary, $total_results, $id_ary, $start, $sort_dir);
1026              $id_ary = array_slice($id_ary, 0, $per_page);
1027  
1028              return $total_results;
1029          }
1030          return false;
1031      }
1032  
1033      /**
1034      * Split a text into words of a given length
1035      *
1036      * The text is converted to UTF-8, cleaned up, and split. Then, words that
1037      * conform to the defined length range are returned in an array.
1038      *
1039      * NOTE: duplicates are NOT removed from the return array
1040      *
1041      * @param    string    $text    Text to split, encoded in UTF-8
1042      * @return    array            Array of UTF-8 words
1043      *
1044      * @access    private
1045      */
1046  	function split_message($text)
1047      {
1048          global $phpbb_root_path, $phpEx, $user;
1049  
1050          $match = $words = array();
1051  
1052          /**
1053          * Taken from the original code
1054          */
1055          // Do not index code
1056          $match[] = '#\[code(?:=.*?)?(\:?[0-9a-z]{5,})\].*?\[\/code(\:?[0-9a-z]{5,})\]#is';
1057          // BBcode
1058          $match[] = '#\[\/?[a-z0-9\*\+\-]+(?:=.*?)?(?::[a-z])?(\:?[0-9a-z]{5,})\]#';
1059  
1060          $min = $this->word_length['min'];
1061          $max = $this->word_length['max'];
1062  
1063          $isset_min = $min - 1;
1064  
1065          /**
1066          * Clean up the string, remove HTML tags, remove BBCodes
1067          */
1068          $word = strtok($this->cleanup(preg_replace($match, ' ', strip_tags($text)), -1), ' ');
1069  
1070          while (strlen($word))
1071          {
1072              if (strlen($word) > 255 || strlen($word) <= $isset_min)
1073              {
1074                  /**
1075                  * Words longer than 255 bytes are ignored. This will have to be
1076                  * changed whenever we change the length of search_wordlist.word_text
1077                  *
1078                  * Words shorter than $isset_min bytes are ignored, too
1079                  */
1080                  $word = strtok(' ');
1081                  continue;
1082              }
1083  
1084              $len = utf8_strlen($word);
1085  
1086              /**
1087              * Test whether the word is too short to be indexed.
1088              *
1089              * Note that this limit does NOT apply to CJK and Hangul
1090              */
1091              if ($len < $min)
1092              {
1093                  /**
1094                  * Note: this could be optimized. If the codepoint is lower than Hangul's range
1095                  * we know that it will also be lower than CJK ranges
1096                  */
1097                  if ((strncmp($word, UTF8_HANGUL_FIRST, 3) < 0 || strncmp($word, UTF8_HANGUL_LAST, 3) > 0)
1098                   && (strncmp($word, UTF8_CJK_FIRST, 3) < 0 || strncmp($word, UTF8_CJK_LAST, 3) > 0)
1099                   && (strncmp($word, UTF8_CJK_B_FIRST, 4) < 0 || strncmp($word, UTF8_CJK_B_LAST, 4) > 0))
1100                  {
1101                      $word = strtok(' ');
1102                      continue;
1103                  }
1104              }
1105  
1106              $words[] = $word;
1107              $word = strtok(' ');
1108          }
1109  
1110          return $words;
1111      }
1112  
1113      /**
1114      * Updates wordlist and wordmatch tables when a message is posted or changed
1115      *
1116      * @param    string    $mode        Contains the post mode: edit, post, reply, quote
1117      * @param    int        $post_id    The id of the post which is modified/created
1118      * @param    string    &$message    New or updated post content
1119      * @param    string    &$subject    New or updated post subject
1120      * @param    int        $poster_id    Post author's user id
1121      * @param    int        $forum_id    The id of the forum in which the post is located
1122      *
1123      * @access    public
1124      */
1125  	function index($mode, $post_id, &$message, &$subject, $poster_id, $forum_id)
1126      {
1127          global $config, $db, $user;
1128  
1129          if (!$config['fulltext_native_load_upd'])
1130          {
1131              /**
1132              * The search indexer is disabled, return
1133              */
1134              return;
1135          }
1136  
1137          // Split old and new post/subject to obtain array of 'words'
1138          $split_text = $this->split_message($message);
1139          $split_title = $this->split_message($subject);
1140  
1141          $cur_words = array('post' => array(), 'title' => array());
1142  
1143          $words = array();
1144          if ($mode == 'edit')
1145          {
1146              $words['add']['post'] = array();
1147              $words['add']['title'] = array();
1148              $words['del']['post'] = array();
1149              $words['del']['title'] = array();
1150  
1151              $sql = 'SELECT w.word_id, w.word_text, m.title_match
1152                  FROM ' . SEARCH_WORDLIST_TABLE . ' w, ' . SEARCH_WORDMATCH_TABLE . " m
1153                  WHERE m.post_id = $post_id
1154                      AND w.word_id = m.word_id";
1155              $result = $db->sql_query($sql);
1156  
1157              while ($row = $db->sql_fetchrow($result))
1158              {
1159                  $which = ($row['title_match']) ? 'title' : 'post';
1160                  $cur_words[$which][$row['word_text']] = $row['word_id'];
1161              }
1162              $db->sql_freeresult($result);
1163  
1164              $words['add']['post'] = array_diff($split_text, array_keys($cur_words['post']));
1165              $words['add']['title'] = array_diff($split_title, array_keys($cur_words['title']));
1166              $words['del']['post'] = array_diff(array_keys($cur_words['post']), $split_text);
1167              $words['del']['title'] = array_diff(array_keys($cur_words['title']), $split_title);
1168          }
1169          else
1170          {
1171              $words['add']['post'] = $split_text;
1172              $words['add']['title'] = $split_title;
1173              $words['del']['post'] = array();
1174              $words['del']['title'] = array();
1175          }
1176          unset($split_text);
1177          unset($split_title);
1178  
1179          // Get unique words from the above arrays
1180          $unique_add_words = array_unique(array_merge($words['add']['post'], $words['add']['title']));
1181  
1182          // We now have unique arrays of all words to be added and removed and
1183          // individual arrays of added and removed words for text and title. What
1184          // we need to do now is add the new words (if they don't already exist)
1185          // and then add (or remove) matches between the words and this post
1186          if (sizeof($unique_add_words))
1187          {
1188              $sql = 'SELECT word_id, word_text
1189                  FROM ' . SEARCH_WORDLIST_TABLE . '
1190                  WHERE ' . $db->sql_in_set('word_text', $unique_add_words);
1191              $result = $db->sql_query($sql);
1192  
1193              $word_ids = array();
1194              while ($row = $db->sql_fetchrow($result))
1195              {
1196                  $word_ids[$row['word_text']] = $row['word_id'];
1197              }
1198              $db->sql_freeresult($result);
1199              $new_words = array_diff($unique_add_words, array_keys($word_ids));
1200  
1201              $db->sql_transaction('begin');
1202              if (sizeof($new_words))
1203              {
1204                  $sql_ary = array();
1205  
1206                  foreach ($new_words as $word)
1207                  {
1208                      $sql_ary[] = array('word_text' => (string) $word, 'word_count' => 0);
1209                  }
1210                  $db->sql_return_on_error(true);
1211                  $db->sql_multi_insert(SEARCH_WORDLIST_TABLE, $sql_ary);
1212                  $db->sql_return_on_error(false);
1213              }
1214              unset($new_words, $sql_ary);
1215          }
1216          else
1217          {
1218              $db->sql_transaction('begin');
1219          }
1220  
1221          // now update the search match table, remove links to removed words and add links to new words
1222          foreach ($words['del'] as $word_in => $word_ary)
1223          {
1224              $title_match = ($word_in == 'title') ? 1 : 0;
1225  
1226              if (sizeof($word_ary))
1227              {
1228                  $sql_in = array();
1229                  foreach ($word_ary as $word)
1230                  {
1231                      $sql_in[] = $cur_words[$word_in][$word];
1232                  }
1233  
1234                  $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1235                      WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1236                          AND post_id = ' . intval($post_id) . "
1237                          AND title_match = $title_match";
1238                  $db->sql_query($sql);
1239  
1240                  $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1241                      SET word_count = word_count - 1
1242                      WHERE ' . $db->sql_in_set('word_id', $sql_in) . '
1243                          AND word_count > 0';
1244                  $db->sql_query($sql);
1245  
1246                  unset($sql_in);
1247              }
1248          }
1249  
1250          $db->sql_return_on_error(true);
1251          foreach ($words['add'] as $word_in => $word_ary)
1252          {
1253              $title_match = ($word_in == 'title') ? 1 : 0;
1254  
1255              if (sizeof($word_ary))
1256              {
1257                  $sql = 'INSERT INTO ' . SEARCH_WORDMATCH_TABLE . ' (post_id, word_id, title_match)
1258                      SELECT ' . (int) $post_id . ', word_id, ' . (int) $title_match . '
1259                      FROM ' . SEARCH_WORDLIST_TABLE . '
1260                      WHERE ' . $db->sql_in_set('word_text', $word_ary);
1261                  $db->sql_query($sql);
1262  
1263                  $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1264                      SET word_count = word_count + 1
1265                      WHERE ' . $db->sql_in_set('word_text', $word_ary);
1266                  $db->sql_query($sql);
1267              }
1268          }
1269          $db->sql_return_on_error(false);
1270  
1271          $db->sql_transaction('commit');
1272  
1273          // destroy cached search results containing any of the words removed or added
1274          $this->destroy_cache(array_unique(array_merge($words['add']['post'], $words['add']['title'], $words['del']['post'], $words['del']['title'])), array($poster_id));
1275  
1276          unset($unique_add_words);
1277          unset($words);
1278          unset($cur_words);
1279      }
1280  
1281      /**
1282      * Removes entries from the wordmatch table for the specified post_ids
1283      */
1284  	function index_remove($post_ids, $author_ids, $forum_ids)
1285      {
1286          global $db;
1287  
1288          if (sizeof($post_ids))
1289          {
1290              $sql = 'SELECT w.word_id, w.word_text, m.title_match
1291                  FROM ' . SEARCH_WORDMATCH_TABLE . ' m, ' . SEARCH_WORDLIST_TABLE . ' w
1292                  WHERE ' . $db->sql_in_set('m.post_id', $post_ids) . '
1293                      AND w.word_id = m.word_id';
1294              $result = $db->sql_query($sql);
1295  
1296              $message_word_ids = $title_word_ids = $word_texts = array();
1297              while ($row = $db->sql_fetchrow($result))
1298              {
1299                  if ($row['title_match'])
1300                  {
1301                      $title_word_ids[] = $row['word_id'];
1302                  }
1303                  else
1304                  {
1305                      $message_word_ids[] = $row['word_id'];
1306                  }
1307                  $word_texts[] = $row['word_text'];
1308              }
1309              $db->sql_freeresult($result);
1310  
1311              if (sizeof($title_word_ids))
1312              {
1313                  $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1314                      SET word_count = word_count - 1
1315                      WHERE ' . $db->sql_in_set('word_id', $title_word_ids) . '
1316                          AND word_count > 0';
1317                  $db->sql_query($sql);
1318              }
1319  
1320              if (sizeof($message_word_ids))
1321              {
1322                  $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1323                      SET word_count = word_count - 1
1324                      WHERE ' . $db->sql_in_set('word_id', $message_word_ids) . '
1325                          AND word_count > 0';
1326                  $db->sql_query($sql);
1327              }
1328  
1329              unset($title_word_ids);
1330              unset($message_word_ids);
1331  
1332              $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1333                  WHERE ' . $db->sql_in_set('post_id', $post_ids);
1334              $db->sql_query($sql);
1335          }
1336  
1337          $this->destroy_cache(array_unique($word_texts), array_unique($author_ids));
1338      }
1339  
1340      /**
1341      * Tidy up indexes: Tag 'common words' and remove
1342      * words no longer referenced in the match table
1343      */
1344  	function tidy()
1345      {
1346          global $db, $config;
1347  
1348          // Is the fulltext indexer disabled? If yes then we need not
1349          // carry on ... it's okay ... I know when I'm not wanted boo hoo
1350          if (!$config['fulltext_native_load_upd'])
1351          {
1352              set_config('search_last_gc', time(), true);
1353              return;
1354          }
1355  
1356          $destroy_cache_words = array();
1357  
1358          // Remove common words
1359          if ($config['num_posts'] >= 100 && $config['fulltext_native_common_thres'])
1360          {
1361              $common_threshold = ((double) $config['fulltext_native_common_thres']) / 100.0;
1362              // First, get the IDs of common words
1363              $sql = 'SELECT word_id, word_text
1364                  FROM ' . SEARCH_WORDLIST_TABLE . '
1365                  WHERE word_count > ' . floor($config['num_posts'] * $common_threshold) . '
1366                      OR word_common = 1';
1367              $result = $db->sql_query($sql);
1368  
1369              $sql_in = array();
1370              while ($row = $db->sql_fetchrow($result))
1371              {
1372                  $sql_in[] = $row['word_id'];
1373                  $destroy_cache_words[] = $row['word_text'];
1374              }
1375              $db->sql_freeresult($result);
1376  
1377              if (sizeof($sql_in))
1378              {
1379                  // Flag the words
1380                  $sql = 'UPDATE ' . SEARCH_WORDLIST_TABLE . '
1381                      SET word_common = 1
1382                      WHERE ' . $db->sql_in_set('word_id', $sql_in);
1383                  $db->sql_query($sql);
1384  
1385                  // by setting search_last_gc to the new time here we make sure that if a user reloads because the
1386                  // following query takes too long, he won't run into it again
1387                  set_config('search_last_gc', time(), true);
1388  
1389                  // Delete the matches
1390                  $sql = 'DELETE FROM ' . SEARCH_WORDMATCH_TABLE . '
1391                      WHERE ' . $db->sql_in_set('word_id', $sql_in);
1392                  $db->sql_query($sql);
1393              }
1394              unset($sql_in);
1395          }
1396  
1397          if (sizeof($destroy_cache_words))
1398          {
1399              // destroy cached search results containing any of the words that are now common or were removed
1400              $this->destroy_cache(array_unique($destroy_cache_words));
1401          }
1402  
1403          set_config('search_last_gc', time(), true);
1404      }
1405  
1406      /**
1407      * Deletes all words from the index
1408      */
1409  	function delete_index($acp_module, $u_action)
1410      {
1411          global $db;
1412  
1413          switch ($db->sql_layer)
1414          {
1415              case 'sqlite':
1416              case 'firebird':
1417                  $db->sql_query('DELETE FROM ' . SEARCH_WORDLIST_TABLE);
1418                  $db->sql_query('DELETE FROM ' . SEARCH_WORDMATCH_TABLE);
1419                  $db->sql_query('DELETE FROM ' . SEARCH_RESULTS_TABLE);
1420              break;
1421  
1422              default:
1423                  $db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDLIST_TABLE);
1424                  $db->sql_query('TRUNCATE TABLE ' . SEARCH_WORDMATCH_TABLE);
1425                  $db->sql_query('TRUNCATE TABLE ' . SEARCH_RESULTS_TABLE);
1426              break;
1427          }
1428      }
1429  
1430      /**
1431      * Returns true if both FULLTEXT indexes exist
1432      */
1433  	function index_created()
1434      {
1435          if (!sizeof($this->stats))
1436          {
1437              $this->get_stats();
1438          }
1439  
1440          return ($this->stats['total_words'] && $this->stats['total_matches']) ? true : false;
1441      }
1442  
1443      /**
1444      * Returns an associative array containing information about the indexes
1445      */
1446  	function index_stats()
1447      {
1448          global $user;
1449  
1450          if (!sizeof($this->stats))
1451          {
1452              $this->get_stats();
1453          }
1454  
1455          return array(
1456              $user->lang['TOTAL_WORDS']        => $this->stats['total_words'],
1457              $user->lang['TOTAL_MATCHES']    => $this->stats['total_matches']);
1458      }
1459  
1460  	function get_stats()
1461      {
1462          global $db;
1463  
1464          $this->stats['total_words']        = $db->get_estimated_row_count(SEARCH_WORDLIST_TABLE);
1465          $this->stats['total_matches']    = $db->get_estimated_row_count(SEARCH_WORDMATCH_TABLE);
1466      }
1467  
1468      /**
1469      * Clean up a text to remove non-alphanumeric characters
1470      *
1471      * This method receives a UTF-8 string, normalizes and validates it, replaces all
1472      * non-alphanumeric characters with strings then returns the result.
1473      *
1474      * Any number of "allowed chars" can be passed as a UTF-8 string in NFC.
1475      *
1476      * @param    string    $text            Text to split, in UTF-8 (not normalized or sanitized)
1477      * @param    string    $allowed_chars    String of special chars to allow
1478      * @param    string    $encoding        Text encoding
1479      * @return    string                    Cleaned up text, only alphanumeric chars are left
1480      *
1481      * @todo normalizer::cleanup being able to be used?
1482      */
1483  	function cleanup($text, $allowed_chars = null, $encoding = 'utf-8')
1484      {
1485          global $phpbb_root_path, $phpEx;
1486          static $conv = array(), $conv_loaded = array();
1487          $words = $allow = array();
1488  
1489          // Convert the text to UTF-8
1490          $encoding = strtolower($encoding);
1491          if ($encoding != 'utf-8')
1492          {
1493              $text = utf8_recode($text, $encoding);
1494          }
1495  
1496          $utf_len_mask = array(
1497              "\xC0"    =>    2,
1498              "\xD0"    =>    2,
1499              "\xE0"    =>    3,
1500              "\xF0"    =>    4
1501          );
1502  
1503          /**
1504          * Replace HTML entities and NCRs
1505          */
1506          $text = htmlspecialchars_decode(utf8_decode_ncr($text), ENT_QUOTES);
1507  
1508          /**
1509          * Load the UTF-8 normalizer
1510          *
1511          * If we use it more widely, an instance of that class should be held in a
1512          * a global variable instead
1513          */
1514          utf_normalizer::nfc($text);
1515  
1516          /**
1517          * The first thing we do is:
1518          *
1519          * - convert ASCII-7 letters to lowercase
1520          * - remove the ASCII-7 non-alpha characters
1521          * - remove the bytes that should not appear in a valid UTF-8 string: 0xC0,
1522          *   0xC1 and 0xF5-0xFF
1523          *
1524          * @todo in theory, the third one is already taken care of during normalization and those chars should have been replaced by Unicode replacement chars
1525          */
1526          $sb_match    = "ISTCPAMELRDOJBNHFGVWUQKYXZ\r\n\t!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~\x00\x01\x02\x03\x04\x05\x06\x07\x08\x0B\x0C\x0E\x0F\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1A\x1B\x1C\x1D\x1E\x1F\xC0\xC1\xF5\xF6\xF7\xF8\xF9\xFA\xFB\xFC\xFD\xFE\xFF";
1527          $sb_replace    = 'istcpamelrdojbnhfgvwuqkyxz                                                                              ';
1528  
1529          /**
1530          * This is the list of legal ASCII chars, it is automatically extended
1531          * with ASCII chars from $allowed_chars
1532          */
1533          $legal_ascii = ' eaisntroludcpmghbfvq10xy2j9kw354867z';
1534  
1535          /**
1536          * Prepare an array containing the extra chars to allow
1537          */
1538          if (isset($allowed_chars[0]))
1539          {
1540              $pos = 0;
1541              $len = strlen($allowed_chars);
1542              do
1543              {
1544                  $c = $allowed_chars[$pos];
1545  
1546                  if ($c < "\x80")
1547                  {
1548                      /**
1549                      * ASCII char
1550                      */
1551                      $sb_pos = strpos($sb_match, $c);
1552                      if (is_int($sb_pos))
1553                      {
1554                          /**
1555                          * Remove the char from $sb_match and its corresponding
1556                          * replacement in $sb_replace
1557                          */
1558                          $sb_match = substr($sb_match, 0, $sb_pos) . substr($sb_match, $sb_pos + 1);
1559                          $sb_replace = substr($sb_replace, 0, $sb_pos) . substr($sb_replace, $sb_pos + 1);
1560                          $legal_ascii .= $c;
1561                      }
1562  
1563                      ++$pos;
1564                  }
1565                  else
1566                  {
1567                      /**
1568                      * UTF-8 char
1569                      */
1570                      $utf_len = $utf_len_mask[$c & "\xF0"];
1571                      $allow[substr($allowed_chars, $pos, $utf_len)] = 1;
1572                      $pos += $utf_len;
1573                  }
1574              }
1575              while ($pos < $len);
1576          }
1577  
1578          $text = strtr($text, $sb_match, $sb_replace);
1579          $ret = '';
1580  
1581          $pos = 0;
1582          $len = strlen($text);
1583  
1584          do
1585          {
1586              /**
1587              * Do all consecutive ASCII chars at once
1588              */
1589              if ($spn = strspn($text, $legal_ascii, $pos))
1590              {
1591                  $ret .= substr($text, $pos, $spn);
1592                  $pos += $spn;
1593              }
1594  
1595              if ($pos >= $len)
1596              {
1597                  return $ret;
1598              }
1599  
1600              /**
1601              * Capture the UTF char
1602              */
1603              $utf_len = $utf_len_mask[$text[$pos] & "\xF0"];
1604              $utf_char = substr($text, $pos, $utf_len);
1605              $pos += $utf_len;
1606  
1607              if (($utf_char >= UTF8_HANGUL_FIRST && $utf_char <= UTF8_HANGUL_LAST)
1608               || ($utf_char >= UTF8_CJK_FIRST && $utf_char <= UTF8_CJK_LAST)
1609               || ($utf_char >= UTF8_CJK_B_FIRST && $utf_char <= UTF8_CJK_B_LAST))
1610              {
1611                  /**
1612                  * All characters within these ranges are valid
1613                  *
1614                  * We separate them with a space in order to index each character
1615                  * individually
1616                  */
1617                  $ret .= ' ' . $utf_char . ' ';
1618                  continue;
1619              }
1620  
1621              if (isset($allow[$utf_char]))
1622              {
1623                  /**
1624                  * The char is explicitly allowed
1625                  */
1626                  $ret .= $utf_char;
1627                  continue;
1628              }
1629  
1630              if (isset($conv[$utf_char]))
1631              {
1632                  /**
1633                  * The char is mapped to something, maybe to itself actually
1634                  */
1635                  $ret .= $conv[$utf_char];
1636                  continue;
1637              }
1638  
1639              /**
1640              * The char isn't mapped, but did we load its conversion table?
1641              *
1642              * The search indexer table is split into blocks. The block number of
1643              * each char is equal to its codepoint right-shifted for 11 bits. It
1644              * means that out of the 11, 16 or 21 meaningful bits of a 2-, 3- or
1645              * 4- byte sequence we only keep the leftmost 0, 5 or 10 bits. Thus,
1646              * all UTF chars encoded in 2 bytes are in the same first block.
1647              */
1648              if (isset($utf_char[2]))
1649              {
1650                  if (isset($utf_char[3]))
1651                  {
1652                      /**
1653                      * 1111 0nnn 10nn nnnn 10nx xxxx 10xx xxxx
1654                      * 0000 0111 0011 1111 0010 0000
1655                      */
1656                      $idx = ((ord($utf_char[0]) & 0x07) << 7) | ((ord($utf_char[1]) & 0x3F) << 1) | ((ord($utf_char[2]) & 0x20) >> 5);
1657                  }
1658                  else
1659                  {
1660                      /**
1661                      * 1110 nnnn 10nx xxxx 10xx xxxx
1662                      * 0000 0111 0010 0000
1663                      */
1664                      $idx = ((ord($utf_char[0]) & 0x07) << 1) | ((ord($utf_char[1]) & 0x20) >> 5);
1665                  }
1666              }
1667              else
1668              {
1669                  /**
1670                  * 110x xxxx 10xx xxxx
1671                  * 0000 0000 0000 0000
1672                  */
1673                  $idx = 0;
1674              }
1675  
1676              /**
1677              * Check if the required conv table has been loaded already
1678              */
1679              if (!isset($conv_loaded[$idx]))
1680              {
1681                  $conv_loaded[$idx] = 1;
1682                  $file = $phpbb_root_path . 'includes/utf/data/search_indexer_' . $idx . '.' . $phpEx;
1683  
1684                  if (file_exists($file))
1685                  {
1686                      $conv += include($file);
1687                  }
1688              }
1689  
1690              if (isset($conv[$utf_char]))
1691              {
1692                  $ret .= $conv[$utf_char];
1693              }
1694              else
1695              {
1696                  /**
1697                  * We add an entry to the conversion table so that we
1698                  * don't have to convert to codepoint and perform the checks
1699                  * that are above this block
1700                  */
1701                  $conv[$utf_char] = ' ';
1702                  $ret .= ' ';
1703              }
1704          }
1705          while (1);
1706  
1707          return $ret;
1708      }
1709  
1710      /**
1711      * Returns a list of options for the ACP to display
1712      */
1713  	function acp()
1714      {
1715          global $user, $config;
1716  
1717  
1718          /**
1719          * if we need any options, copied from fulltext_native for now, will have to be adjusted or removed
1720          */
1721  
1722          $tpl = '
1723          <dl>
1724              <dt><label for="fulltext_native_load_upd">' . $user->lang['YES_SEARCH_UPDATE'] . ':</label><br /><span>' . $user->lang['YES_SEARCH_UPDATE_EXPLAIN'] . '</span></dt>
1725              <dd><label><input type="radio" id="fulltext_native_load_upd" name="config[fulltext_native_load_upd]" value="1"' . (($config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['YES'] . '</label><label><input type="radio" name="config[fulltext_native_load_upd]" value="0"' . ((!$config['fulltext_native_load_upd']) ? ' checked="checked"' : '') . ' class="radio" /> ' . $user->lang['NO'] . '</label></dd>
1726          </dl>
1727          <dl>
1728              <dt><label for="fulltext_native_min_chars">' . $user->lang['MIN_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MIN_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1729              <dd><input id="fulltext_native_min_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_min_chars]" value="' . (int) $config['fulltext_native_min_chars'] . '" /></dd>
1730          </dl>
1731          <dl>
1732              <dt><label for="fulltext_native_max_chars">' . $user->lang['MAX_SEARCH_CHARS'] . ':</label><br /><span>' . $user->lang['MAX_SEARCH_CHARS_EXPLAIN'] . '</span></dt>
1733              <dd><input id="fulltext_native_max_chars" type="text" size="3" maxlength="3" name="config[fulltext_native_max_chars]" value="' . (int) $config['fulltext_native_max_chars'] . '" /></dd>
1734          </dl>
1735          <dl>
1736              <dt><label for="fulltext_native_common_thres">' . $user->lang['COMMON_WORD_THRESHOLD'] . ':</label><br /><span>' . $user->lang['COMMON_WORD_THRESHOLD_EXPLAIN'] . '</span></dt>
1737              <dd><input id="fulltext_native_common_thres" type="text" size="3" maxlength="3" name="config[fulltext_native_common_thres]" value="' . (double) $config['fulltext_native_common_thres'] . '" /> %</dd>
1738          </dl>
1739          ';
1740  
1741          // These are fields required in the config table
1742          return array(
1743              'tpl'        => $tpl,
1744              'config'    => array('fulltext_native_load_upd' => 'bool', 'fulltext_native_min_chars' => 'integer:0:255', 'fulltext_native_max_chars' => 'integer:0:255', 'fulltext_native_common_thres' => 'double:0:100')
1745          );
1746      }
1747  }
1748  
1749  ?>


Generated: Wed Oct 2 15:03:47 2013 Cross-referenced by PHPXref 0.7.1