[ Index ]

PHP Cross Reference of Unnamed Project

title

Body

[close]

/includes/ -> functions_search.php (source)

   1  <?php
   2  /***************************************************************************
   3  *                              functions_search.php
   4  *                              -------------------
   5  *     begin                : Wed Sep 05 2001
   6  *     copyright            : (C) 2002 The phpBB Group
   7  *     email                : support@phpbb.com
   8  *
   9  *     $Id: functions_search.php 5204 2005-09-14 18:14:30Z acydburn $
  10  *
  11  ****************************************************************************/
  12  
  13  /***************************************************************************
  14   *
  15   *   This program is free software; you can redistribute it and/or modify
  16   *   it under the terms of the GNU General Public License as published by
  17   *   the Free Software Foundation; either version 2 of the License, or
  18   *   (at your option) any later version.
  19   *
  20   ***************************************************************************/
  21  
  22  function clean_words($mode, &$entry, &$stopword_list, &$synonym_list)
  23  {
  24      static $drop_char_match =   array('^', '$', '&', '(', ')', '<', '>', '`', '\'', '"', '|', ',', '@', '_', '?', '%', '-', '~', '+', '.', '[', ']', '{', '}', ':', '\\', '/', '=', '#', '\'', ';', '!');
  25      static $drop_char_replace = array(' ', ' ', ' ', ' ', ' ', ' ', ' ', '',  '',   ' ', ' ', ' ', ' ', '',  ' ', ' ', '',  ' ',  ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ' , ' ', ' ', ' ', ' ',  ' ', ' ');
  26  
  27      $entry = ' ' . strip_tags(strtolower($entry)) . ' ';
  28  
  29      if ( $mode == 'post' )
  30      {
  31          // Replace line endings by a space
  32          $entry = preg_replace('/[\n\r]/is', ' ', $entry); 
  33          // HTML entities like &nbsp;
  34          $entry = preg_replace('/\b&[a-z]+;\b/', ' ', $entry); 
  35          // Remove URL's
  36          $entry = preg_replace('/\b[a-z0-9]+:\/\/[a-z0-9\.\-]+(\/[a-z0-9\?\.%_\-\+=&\/]+)?/', ' ', $entry); 
  37          // Quickly remove BBcode.
  38          $entry = preg_replace('/\[img:[a-z0-9]{10,}\].*?\[\/img:[a-z0-9]{10,}\]/', ' ', $entry); 
  39          $entry = preg_replace('/\[\/?url(=.*?)?\]/', ' ', $entry);
  40          $entry = preg_replace('/\[\/?[a-z\*=\+\-]+(\:?[0-9a-z]+)?:[a-z0-9]{10,}(\:[a-z0-9]+)?=?.*?\]/', ' ', $entry);
  41      }
  42      else if ( $mode == 'search' ) 
  43      {
  44          $entry = str_replace(' +', ' and ', $entry);
  45          $entry = str_replace(' -', ' not ', $entry);
  46      }
  47  
  48      //
  49      // Filter out strange characters like ^, $, &, change "it's" to "its"
  50      //
  51      for($i = 0; $i < count($drop_char_match); $i++)
  52      {
  53          $entry =  str_replace($drop_char_match[$i], $drop_char_replace[$i], $entry);
  54      }
  55  
  56      if ( $mode == 'post' )
  57      {
  58          $entry = str_replace('*', ' ', $entry);
  59  
  60          // 'words' that consist of <3 or >20 characters are removed.
  61          $entry = preg_replace('/[ ]([\S]{1,2}|[\S]{21,})[ ]/',' ', $entry);
  62      }
  63  
  64      if ( !empty($stopword_list) )
  65      {
  66          for ($j = 0; $j < count($stopword_list); $j++)
  67          {
  68              $stopword = trim($stopword_list[$j]);
  69  
  70              if ( $mode == 'post' || ( $stopword != 'not' && $stopword != 'and' && $stopword != 'or' ) )
  71              {
  72                  $entry = str_replace(' ' . trim($stopword) . ' ', ' ', $entry);
  73              }
  74          }
  75      }
  76  
  77      if ( !empty($synonym_list) )
  78      {
  79          for ($j = 0; $j < count($synonym_list); $j++)
  80          {
  81              list($replace_synonym, $match_synonym) = split(' ', trim(strtolower($synonym_list[$j])));
  82              if ( $mode == 'post' || ( $match_synonym != 'not' && $match_synonym != 'and' && $match_synonym != 'or' ) )
  83              {
  84                  $entry =  str_replace(' ' . trim($match_synonym) . ' ', ' ' . trim($replace_synonym) . ' ', $entry);
  85              }
  86          }
  87      }
  88  
  89      return $entry;
  90  }
  91  
  92  function split_words($entry, $mode = 'post')
  93  {
  94      // If you experience problems with the new method, uncomment this block.
  95  /*    
  96      $rex = ( $mode == 'post' ) ? "/\b([\w±µ-ÿ][\w±µ-ÿ']*[\w±µ-ÿ]+|[\w±µ-ÿ]+?)\b/" : '/(\*?[a-z0-9±µ-ÿ]+\*?)|\b([a-z0-9±µ-ÿ]+)\b/';
  97      preg_match_all($rex, $entry, $split_entries);
  98  
  99      return $split_entries[1];
 100  */
 101      // Trim 1+ spaces to one space and split this trimmed string into words.
 102      return explode(' ', trim(preg_replace('#\s+#', ' ', $entry)));
 103  }
 104  
 105  function add_search_words($mode, $post_id, $post_text, $post_title = '')
 106  {
 107      global $db, $phpbb_root_path, $board_config, $lang;
 108  
 109      $stopword_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_stopwords.txt"); 
 110      $synonym_array = @file($phpbb_root_path . 'language/lang_' . $board_config['default_lang'] . "/search_synonyms.txt"); 
 111  
 112      $search_raw_words = array();
 113      $search_raw_words['text'] = split_words(clean_words('post', $post_text, $stopword_array, $synonym_array));
 114      $search_raw_words['title'] = split_words(clean_words('post', $post_title, $stopword_array, $synonym_array));
 115  
 116      @set_time_limit(0);
 117  
 118      $word = array();
 119      $word_insert_sql = array();
 120      while ( list($word_in, $search_matches) = @each($search_raw_words) )
 121      {
 122          $word_insert_sql[$word_in] = '';
 123          if ( !empty($search_matches) )
 124          {
 125              for ($i = 0; $i < count($search_matches); $i++)
 126              { 
 127                  $search_matches[$i] = trim($search_matches[$i]);
 128  
 129                  if( $search_matches[$i] != '' ) 
 130                  {
 131                      $word[] = $search_matches[$i];
 132                      if ( !strstr($word_insert_sql[$word_in], "'" . $search_matches[$i] . "'") )
 133                      {
 134                          $word_insert_sql[$word_in] .= ( $word_insert_sql[$word_in] != "" ) ? ", '" . $search_matches[$i] . "'" : "'" . $search_matches[$i] . "'";
 135                      }
 136                  } 
 137              }
 138          }
 139      }
 140  
 141      if ( count($word) )
 142      {
 143          sort($word);
 144  
 145          $prev_word = '';
 146          $word_text_sql = '';
 147          $temp_word = array();
 148          for($i = 0; $i < count($word); $i++)
 149          {
 150              if ( $word[$i] != $prev_word )
 151              {
 152                  $temp_word[] = $word[$i];
 153                  $word_text_sql .= ( ( $word_text_sql != '' ) ? ', ' : '' ) . "'" . $word[$i] . "'";
 154              }
 155              $prev_word = $word[$i];
 156          }
 157          $word = $temp_word;
 158  
 159          $check_words = array();
 160          switch( SQL_LAYER )
 161          {
 162              case 'postgresql':
 163              case 'msaccess':
 164              case 'mssql-odbc':
 165              case 'oracle':
 166              case 'db2':
 167                  $sql = "SELECT word_id, word_text     
 168                      FROM " . SEARCH_WORD_TABLE . " 
 169                      WHERE word_text IN ($word_text_sql)";
 170                  if ( !($result = $db->sql_query($sql)) )
 171                  {
 172                      message_die(GENERAL_ERROR, 'Could not select words', '', __LINE__, __FILE__, $sql);
 173                  }
 174  
 175                  while ( $row = $db->sql_fetchrow($result) )
 176                  {
 177                      $check_words[$row['word_text']] = $row['word_id'];
 178                  }
 179                  break;
 180          }
 181  
 182          $value_sql = '';
 183          $match_word = array();
 184          for ($i = 0; $i < count($word); $i++)
 185          { 
 186              $new_match = true;
 187              if ( isset($check_words[$word[$i]]) )
 188              {
 189                  $new_match = false;
 190              }
 191  
 192              if ( $new_match )
 193              {
 194                  switch( SQL_LAYER )
 195                  {
 196                      case 'mysql':
 197                      case 'mysql4':
 198                          $value_sql .= ( ( $value_sql != '' ) ? ', ' : '' ) . '(\'' . $word[$i] . '\', 0)';
 199                          break;
 200                      case 'mssql':
 201                      case 'mssql-odbc':
 202                          $value_sql .= ( ( $value_sql != '' ) ? ' UNION ALL ' : '' ) . "SELECT '" . $word[$i] . "', 0";
 203                          break;
 204                      default:
 205                          $sql = "INSERT INTO " . SEARCH_WORD_TABLE . " (word_text, word_common) 
 206                              VALUES ('" . $word[$i] . "', 0)"; 
 207                          if( !$db->sql_query($sql) )
 208                          {
 209                              message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
 210                          }
 211                          break;
 212                  }
 213              }
 214          }
 215  
 216          if ( $value_sql != '' )
 217          {
 218              switch ( SQL_LAYER )
 219              {
 220                  case 'mysql':
 221                  case 'mysql4':
 222                      $sql = "INSERT IGNORE INTO " . SEARCH_WORD_TABLE . " (word_text, word_common) 
 223                          VALUES $value_sql"; 
 224                      break;
 225                  case 'mssql':
 226                  case 'mssql-odbc':
 227                      $sql = "INSERT INTO " . SEARCH_WORD_TABLE . " (word_text, word_common) 
 228                          $value_sql"; 
 229                      break;
 230              }
 231  
 232              if ( !$db->sql_query($sql) )
 233              {
 234                  message_die(GENERAL_ERROR, 'Could not insert new word', '', __LINE__, __FILE__, $sql);
 235              }
 236          }
 237      }
 238  
 239      while( list($word_in, $match_sql) = @each($word_insert_sql) )
 240      {
 241          $title_match = ( $word_in == 'title' ) ? 1 : 0;
 242  
 243          if ( $match_sql != '' )
 244          {
 245              $sql = "INSERT INTO " . SEARCH_MATCH_TABLE . " (post_id, word_id, title_match) 
 246                  SELECT $post_id, word_id, $title_match  
 247                      FROM " . SEARCH_WORD_TABLE . " 
 248                      WHERE word_text IN ($match_sql)"; 
 249              if ( !$db->sql_query($sql) )
 250              {
 251                  message_die(GENERAL_ERROR, 'Could not insert new word matches', '', __LINE__, __FILE__, $sql);
 252              }
 253          }
 254      }
 255  
 256      if ($mode == 'single')
 257      {
 258          remove_common('single', 4/10, $word);
 259      }
 260  
 261      return;
 262  }
 263  
 264  //
 265  // Check if specified words are too common now
 266  //
 267  function remove_common($mode, $fraction, $word_id_list = array())
 268  {
 269      global $db;
 270  
 271      $sql = "SELECT COUNT(post_id) AS total_posts 
 272          FROM " . POSTS_TABLE;
 273      if ( !($result = $db->sql_query($sql)) )
 274      {
 275          message_die(GENERAL_ERROR, 'Could not obtain post count', '', __LINE__, __FILE__, $sql);
 276      }
 277  
 278      $row = $db->sql_fetchrow($result);
 279  
 280      if ( $row['total_posts'] >= 100 )
 281      {
 282          $common_threshold = floor($row['total_posts'] * $fraction);
 283  
 284          if ( $mode == 'single' && count($word_id_list) )
 285          {
 286              $word_id_sql = '';
 287              for($i = 0; $i < count($word_id_list); $i++)
 288              {
 289                  $word_id_sql .= ( ( $word_id_sql != '' ) ? ', ' : '' ) . "'" . $word_id_list[$i] . "'";
 290              }
 291  
 292              $sql = "SELECT m.word_id 
 293                  FROM " . SEARCH_MATCH_TABLE . " m, " . SEARCH_WORD_TABLE . " w 
 294                  WHERE w.word_text IN ($word_id_sql)  
 295                      AND m.word_id = w.word_id 
 296                  GROUP BY m.word_id 
 297                  HAVING COUNT(m.word_id) > $common_threshold";
 298          }
 299          else 
 300          {
 301              $sql = "SELECT word_id 
 302                  FROM " . SEARCH_MATCH_TABLE . " 
 303                  GROUP BY word_id 
 304                  HAVING COUNT(word_id) > $common_threshold";
 305          }
 306  
 307          if ( !($result = $db->sql_query($sql)) )
 308          {
 309              message_die(GENERAL_ERROR, 'Could not obtain common word list', '', __LINE__, __FILE__, $sql);
 310          }
 311  
 312          $common_word_id = '';
 313          while ( $row = $db->sql_fetchrow($result) )
 314          {
 315              $common_word_id .= ( ( $common_word_id != '' ) ? ', ' : '' ) . $row['word_id'];
 316          }
 317          $db->sql_freeresult($result);
 318  
 319          if ( $common_word_id != '' )
 320          {
 321              $sql = "UPDATE " . SEARCH_WORD_TABLE . "
 322                  SET word_common = " . TRUE . " 
 323                  WHERE word_id IN ($common_word_id)";
 324              if ( !$db->sql_query($sql) )
 325              {
 326                  message_die(GENERAL_ERROR, 'Could not delete word list entry', '', __LINE__, __FILE__, $sql);
 327              }
 328  
 329              $sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "  
 330                  WHERE word_id IN ($common_word_id)";
 331              if ( !$db->sql_query($sql) )
 332              {
 333                  message_die(GENERAL_ERROR, 'Could not delete word match entry', '', __LINE__, __FILE__, $sql);
 334              }
 335          }
 336      }
 337  
 338      return;
 339  }
 340  
 341  function remove_search_post($post_id_sql)
 342  {
 343      global $db;
 344  
 345      $words_removed = false;
 346  
 347      switch ( SQL_LAYER )
 348      {
 349          case 'mysql':
 350          case 'mysql4':
 351              $sql = "SELECT word_id 
 352                  FROM " . SEARCH_MATCH_TABLE . " 
 353                  WHERE post_id IN ($post_id_sql) 
 354                  GROUP BY word_id";
 355              if ( $result = $db->sql_query($sql) )
 356              {
 357                  $word_id_sql = '';
 358                  while ( $row = $db->sql_fetchrow($result) )
 359                  {
 360                      $word_id_sql .= ( $word_id_sql != '' ) ? ', ' . $row['word_id'] : $row['word_id']; 
 361                  }
 362  
 363                  $sql = "SELECT word_id 
 364                      FROM " . SEARCH_MATCH_TABLE . " 
 365                      WHERE word_id IN ($word_id_sql) 
 366                      GROUP BY word_id 
 367                      HAVING COUNT(word_id) = 1";
 368                  if ( $result = $db->sql_query($sql) )
 369                  {
 370                      $word_id_sql = '';
 371                      while ( $row = $db->sql_fetchrow($result) )
 372                      {
 373                          $word_id_sql .= ( $word_id_sql != '' ) ? ', ' . $row['word_id'] : $row['word_id']; 
 374                      }
 375  
 376                      if ( $word_id_sql != '' )
 377                      {
 378                          $sql = "DELETE FROM " . SEARCH_WORD_TABLE . " 
 379                              WHERE word_id IN ($word_id_sql)";
 380                          if ( !$db->sql_query($sql) )
 381                          {
 382                              message_die(GENERAL_ERROR, 'Could not delete word list entry', '', __LINE__, __FILE__, $sql);
 383                          }
 384  
 385                          $words_removed = $db->sql_affectedrows();
 386                      }
 387                  }
 388              }
 389              break;
 390  
 391          default:
 392              $sql = "DELETE FROM " . SEARCH_WORD_TABLE . " 
 393                  WHERE word_id IN ( 
 394                      SELECT word_id 
 395                      FROM " . SEARCH_MATCH_TABLE . " 
 396                      WHERE word_id IN ( 
 397                          SELECT word_id 
 398                          FROM " . SEARCH_MATCH_TABLE . " 
 399                          WHERE post_id IN ($post_id_sql) 
 400                          GROUP BY word_id 
 401                      ) 
 402                      GROUP BY word_id 
 403                      HAVING COUNT(word_id) = 1
 404                  )"; 
 405              if ( !$db->sql_query($sql) )
 406              {
 407                  message_die(GENERAL_ERROR, 'Could not delete old words from word table', '', __LINE__, __FILE__, $sql);
 408              }
 409  
 410              $words_removed = $db->sql_affectedrows();
 411  
 412              break;
 413      }
 414  
 415      $sql = "DELETE FROM " . SEARCH_MATCH_TABLE . "  
 416          WHERE post_id IN ($post_id_sql)";
 417      if ( !$db->sql_query($sql) )
 418      {
 419          message_die(GENERAL_ERROR, 'Error in deleting post', '', __LINE__, __FILE__, $sql);
 420      }
 421  
 422      return $words_removed;
 423  }
 424  
 425  //
 426  // Username search
 427  //
 428  function username_search($search_match)
 429  {
 430      global $db, $board_config, $template, $lang, $images, $theme, $phpEx, $phpbb_root_path;
 431      global $starttime, $gen_simple_header;
 432      
 433      $gen_simple_header = TRUE;
 434  
 435      $username_list = '';
 436      if ( !empty($search_match) )
 437      {
 438          $username_search = preg_replace('/\*/', '%', phpbb_clean_username($search_match));
 439  
 440          $sql = "SELECT username 
 441              FROM " . USERS_TABLE . " 
 442              WHERE username LIKE '" . str_replace("\'", "''", $username_search) . "' AND user_id <> " . ANONYMOUS . "
 443              ORDER BY username";
 444          if ( !($result = $db->sql_query($sql)) )
 445          {
 446              message_die(GENERAL_ERROR, 'Could not obtain search results', '', __LINE__, __FILE__, $sql);
 447          }
 448  
 449          if ( $row = $db->sql_fetchrow($result) )
 450          {
 451              do
 452              {
 453                  $username_list .= '<option value="' . $row['username'] . '">' . $row['username'] . '</option>';
 454              }
 455              while ( $row = $db->sql_fetchrow($result) );
 456          }
 457          else
 458          {
 459              $username_list .= '<option>' . $lang['No_match']. '</option>';
 460          }
 461          $db->sql_freeresult($result);
 462      }
 463  
 464      $page_title = $lang['Search'];
 465      include($phpbb_root_path . 'includes/page_header.'.$phpEx);
 466  
 467      $template->set_filenames(array(
 468          'search_user_body' => 'search_username.tpl')
 469      );
 470  
 471      $template->assign_vars(array(
 472          'USERNAME' => (!empty($search_match)) ? phpbb_clean_username($search_match) : '', 
 473  
 474          'L_CLOSE_WINDOW' => $lang['Close_window'], 
 475          'L_SEARCH_USERNAME' => $lang['Find_username'], 
 476          'L_UPDATE_USERNAME' => $lang['Select_username'], 
 477          'L_SELECT' => $lang['Select'], 
 478          'L_SEARCH' => $lang['Search'], 
 479          'L_SEARCH_EXPLAIN' => $lang['Search_author_explain'], 
 480          'L_CLOSE_WINDOW' => $lang['Close_window'], 
 481  
 482          'S_USERNAME_OPTIONS' => $username_list, 
 483          'S_SEARCH_ACTION' => append_sid("search.$phpEx?mode=searchuser"))
 484      );
 485  
 486      if ( $username_list != '' )
 487      {
 488          $template->assign_block_vars('switch_select_name', array());
 489      }
 490  
 491      $template->pparse('search_user_body');
 492  
 493      include($phpbb_root_path . 'includes/page_tail.'.$phpEx);
 494  
 495      return;
 496  }
 497  
 498  ?>


Generated: Mon Jan 14 19:21:40 2013 Cross-referenced by PHPXref 0.7.1