Project

General

Profile

Bug #17054 » indexed_search_fulltext.diff

Administrator Admin, 2008-11-08 20:09

View differences:

typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php (Revision 0)
<?php
/***************************************************************
* Copyright notice
*
* (c) 2008 Michael Stucki (michael@typo3.org)
* All rights reserved
*
* This script is part of the TYPO3 project. The TYPO3 project is
* free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* The GNU General Public License can be found at
* http://www.gnu.org/copyleft/gpl.html.
* A copy is found in the textfile GPL.txt and important notices to the license
* from the author is found in LICENSE.txt distributed with these scripts.
*
*
* This script is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
/**
* Database handler class
*
* @author Michael Stucki <michael@typo3.org>
*/
/**
* [CLASS/FUNCTION INDEX of SCRIPT]
*
*
*
* TOTAL FUNCTIONS: 0
* (This index is automatically created/updated by the extension "extdeveval")
*
*/
/**
* Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries.
*
* @author Michael Stucki <michael@typo3.org>
* @package TYPO3
* @subpackage tx_indexedsearch_mysql
*/
class tx_indexedsearch_mysql {
var $pObj;
/**
* Gets a SQL result pointer to traverse for the search records.
*
* @param array Search words
* @param integer Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
* @return pointer
*/
function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1) {
// Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
$searchData = $this->getSearchString($sWArr);
// Perform SQL Search / collection of result rows array:
if ($searchData) {
// Do the search:
$GLOBALS['TT']->push('execFinalQuery');
$res = $this->execFinalQuery_fulltext($searchData,$freeIndexUid);
$GLOBALS['TT']->pull();
return $res;
} else {
return false;
}
}
/**
* Returns a search string for use with MySQL FULLTEXT query
*
* @param array Search word array
* @return string Search string
*/
function getSearchString($sWArr) {
// Initialize variables:
$count = 0;
$searchBoolean = false; // Change this to true to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
$fulltextIndex = 'index_fulltext.fulltextdata';
$naturalSearchString = ''; // This holds the result if the search is natural (doesn't contain any boolean operators)
$booleanSearchString = ''; // This holds the result if the search is boolen (contains +/-/| operators)
// Traverse searchwords and prefix them with corresponding operator
foreach ($sWArr as $k => $v) {
// Making the query for a single search word based on the search-type
$sWord = $v['sword']; // $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower'); // lower-case all of them...
$trail_natural = '';
$trail_boolean = '';
$theType = (string)$this->pObj->piVars['type'];
if (strstr($sWord,' ')) {
$theType = 20; // If there are spaces in the search-word, make a full text search instead.
}
switch ($theType) {
case '1': // Part of word
case '3': // Last part of word
// These options are both not possible with fulltext indexing! Therefore, fallback to first-part-of-word search
case '2': // First part of word
$trail_boolean = '*';
// Part-of-word search requires boolean mode!
$searchBoolean = true;
break;
case '10': // Sounds like
/**
* Indexer object
*
* @var tx_indexedsearch_indexer
*/
$indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); // Initialize the indexer-class
// Perform metaphone search
$sWord = $indexerObj->metaphone($sWord,$this->pObj->storeMetaphoneInfoAsWords);
unset($indexerObj);
$fulltextIndex = 'index_fulltext.metaphonedata';
break;
case '20': // Sentence
$searchBoolean = true;
$sWord = preg_replace('/^"(.*)"$/','$1',$sWord); // Remove existing quotes as they will be added later anyway...
break;
default: // Distinct word
}
// Perform search for word:
switch ($v['oper']) {
case 'AND NOT':
$booleanSearchString.= ' -' . $sWord . $trail_boolean;
$searchBoolean = true;
break;
case 'OR':
$booleanSearchString.= ' ' . $sWord . $trail_boolean;
$searchBoolean = true;
break;
default:
$booleanSearchString.= ' +' . $sWord . $trail_boolean;
$naturalSearchString.= ' ' . $sWord . $trail_natural;
}
$count++;
}
if ($theType=='20') { // Sentence
$searchString = '"'.trim($naturalSearchString).'"';
} elseif ($searchBoolean) {
$searchString = trim($booleanSearchString);
} else {
$searchString = trim($naturalSearchString);
}
return array(
'searchBoolean' => $searchBoolean,
'searchString' => $searchString,
'fulltextIndex' => $fulltextIndex
);
}
/**
* Execute final query, based on phash integer list. The main point is sorting the result in the right order.
*
* @param array Array with search string, boolean indicator, and fulltext index reference
* @param integer Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
* @return pointer Query result pointer
*/
function execFinalQuery_fulltext($searchData,$freeIndexUid=-1) {
// Setting up methods of filtering results based on page types, access, etc.
$page_join = '';
$page_where = '';
// Indexing configuration clause:
$freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid);
// Calling hook for alternative creation of page ID list
if ($hookObj = &$this->pObj->hookRequest('execFinalQuery_idList')) {
$page_where = $hookObj->execFinalQuery_idList(''); // Originally this hook expects a list of page IDs, so since we don't know them yet, just send an empty string. Users of this hook need to adjust their hook to this!
} elseif ($this->pObj->join_pages) { // Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected.
$page_join = ',
pages';
$page_where = 'pages.uid = ISEC.page_id
'.$this->pObj->cObj->enableFields('pages').'
AND pages.no_search=0
AND pages.doktype<200
';
} elseif ($this->pObj->wholeSiteIdList>=0) { // Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field!
$siteIdNumbers = t3lib_div::intExplode(',',$this->pObj->wholeSiteIdList);
$id_list = array();
while(list(,$rootId)=each($siteIdNumbers)) {
$id_list[] = $this->pObj->cObj->getTreeList($rootId,9999,0,0,'','').$rootId;
}
$page_where = ' ISEC.page_id IN ('.implode(',',$id_list).')';
} else { // Disable everything... (select all)
$page_where = ' 1=1';
}
$searchBoolean = '';
if ($searchData['searchBoolean']) {
$searchBoolean = ' IN BOOLEAN MODE';
}
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
'index_fulltext.*, ISEC.*, IP.*',
'index_fulltext, index_section ISEC, index_phash IP' . $page_join,
'MATCH ('.$searchData['fulltextIndex'].') AGAINST ('.$GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'],'index_fulltext').$searchBoolean.') '.
$this->pObj->mediaTypeWhere().' '.
$this->pObj->languageWhere().
$freeIndexUidClause.'
AND index_fulltext.phash = IP.phash
AND ISEC.phash = IP.phash
AND '.$page_where,
'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2 ,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId'
);
return $res;
}
}
?>
typo3/sysext/indexed_search_mysql/ext_localconf.php (Revision 0)
<?php
if (!defined ('TYPO3_MODE')) die ('Access denied.');
// Configure hook to query the fulltext index
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['pi1_hooks']['getResultRows_SQLpointer'] = 'EXT:indexed_search_mysql/class.tx_indexedsearch_mysql.php:&tx_indexedsearch_mysql';
// Use all index_* tables except "index_rel" and "index_words"
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
?>
typo3/sysext/indexed_search_mysql/ChangeLog (Revision 0)
2008-03-18 Michael Stucki <michael@typo3.org>
* Initial version
typo3/sysext/indexed_search_mysql/ext_tables.sql (Revision 0)
#
# Table structure for table 'index_fulltext'
#
# Differences compared to original definition in EXT:indexed_search are as follows:
# - Add new mediumtext field "metaphonedata"
# - Add new FULLTEXT index "fulltextdata"
# - Add new FULLTEXT index "metaphonedata"
# - Change table engine from InnoDB to MyISAM (required for FULLTEXT indexing)
CREATE TABLE index_fulltext (
phash int(11) DEFAULT '0' NOT NULL,
fulltextdata mediumtext,
metaphonedata mediumtext,
PRIMARY KEY (phash)
FULLTEXT fulltextdata (fulltextdata)
FULLTEXT metaphonedata (metaphonedata)
) ENGINE=MyISAM;
typo3/sysext/indexed_search_mysql/ext_emconf.php (Revision 0)
<?php
########################################################################
# Extension Manager/Repository config file for ext: "indexed_search_mysql"
#
# Auto generated 03-11-2008 23:18
#
# Manual updates:
# Only the data in the array - anything else is removed by next write.
# "version" and "dependencies" must not be touched!
########################################################################
$EM_CONF[$_EXTKEY] = array(
'title' => 'MySQL driver for Indexed Search Engine',
'description' => 'MySQL specific driver for Indexed Search Engine. Allows usage of MySQL-only features like FULLTEXT indexes.',
'category' => 'misc',
'shy' => 0,
'dependencies' => 'cms,indexed_search',
'conflicts' => '',
'priority' => '',
'loadOrder' => '',
'module' => '',
'state' => 'alpha',
'internal' => 1,
'uploadfolder' => 0,
'createDirs' => '',
'modify_tables' => '',
'clearCacheOnLoad' => 1,
'lockType' => '',
'author' => 'Michael Stucki',
'author_email' => 'michael@typo3.org',
'author_company' => '',
'CGLcompliance' => '',
'CGLcompliance_note' => '',
'version' => '2.10.0',
'_md5_values_when_last_written' => 'a:5:{s:9:"ChangeLog";s:4:"1bb1";s:32:"class.tx_indexedsearch_mysql.php";s:4:"3a48";s:17:"ext_localconf.php";s:4:"31c9";s:14:"ext_tables.php";s:4:"c4b7";s:14:"ext_tables.sql";s:4:"7f93";}',
'constraints' => array(
'depends' => array(
'cms' => '',
'php' => '5.2.0-0.0.0',
'typo3' => '4.2.0-0.0.0',
'indexed_search' => '2.10.0-',
),
'conflicts' => array(
),
'suggests' => array(
'doc_indexed_search' => '',
),
),
'suggests' => array(
),
);
?>
typo3/sysext/indexed_search/ChangeLog (Arbeitskopie)
2008-11-03 Michael Stucki <michael@typo3.org>
* Check if files in search matches are still existing before displaying them
2008-04-01 Michael Stucki <michael@typo3.org>
* Fixed bug #7980: Fix wrong TypoScript code in plugin template
typo3/sysext/indexed_search/class.indexer.php (Arbeitskopie)
var $freqRange = 32000;
var $freqMax = 0.1;
var $enableMetaphoneSearch = false;
var $storeMetaphoneInfoAsWords;
var $metaphoneContent = '';
// Objects:
/**
* Charset class object
......
$this->maxExternalFiles = t3lib_div::intInRange($this->indexerConfig['maxExternalFiles'],0,1000,5);
$this->flagBitMask = t3lib_div::intInRange($this->indexerConfig['flagBitMask'],0,255);
// Workaround: If the extension configuration was not updated yet, the value is not existing
$this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0) : 1;
$this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : ($this->enableMetaphoneSearch ? true : false);
// Initialize external document parsers:
// Example configuration, see ext_localconf.php of this file!
if ($this->conf['index_externals']) {
......
// Initialize metaphone hook:
// Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) {
// Make sure that the hook is loaded _after_ indexed_search as this may overwrite the hook depending on the configuration.
if ($this->enableMetaphoneSearch && $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) {
$this->metaphoneObj = &t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']);
$this->metaphoneObj->pObj = &$this;
}
......
$this->log_pull();
// Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!)
$this->content_md5h = $this->md5inthash(implode($this->contentParts,''));
$this->content_md5h = $this->md5inthash(implode('', $this->contentParts));
// This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash.
// If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more.
......
// Check words and submit to word list if not there
$this->log_push('Check word list and submit words','');
$this->checkWordList($indexArr);
$this->submitWords($indexArr,$this->hash['phash']);
if ($this->isTableUsed('index_words')) {
$this->checkWordList($indexArr);
$this->submitWords($indexArr,$this->hash['phash']);
}
$this->log_pull();
// Set parsetime
......
// Check words and submit to word list if not there
$this->log_push('Check word list and submit words','');
$this->checkWordList($indexArr);
$this->submitWords($indexArr,$phash_arr['phash']);
if ($this->isTableUsed('index_words')) {
$this->checkWordList($indexArr);
$this->submitWords($indexArr,$phash_arr['phash']);
}
$this->log_pull();
// Set parsetime
......
$this->analyzeHeaderinfo($indexArr,$content,'description',5);
$this->analyzeBody($indexArr,$content);
return ($indexArr);
return $indexArr;
}
/**
......
* @return void
*/
function analyzeHeaderinfo(&$retArr,$content,$key,$offset) {
reset($content[$key]);
while(list(,$val)=each($content[$key])) {
$val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same.
foreach ($content[$key] as $val) {
$val = substr($val,0,60); // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same.
if (!isset($retArr[$val])) {
// Word ID (wid)
$retArr[$val]['hash'] = $this->md5inthash($val);
// Metaphone value is also 60 only chars long
$metaphone = $this->enableMetaphoneSearch
? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60)
: '';
$retArr[$val]['metaphone'] = $metaphone;
}
// Build metaphone fulltext string (can be used for fulltext indexing)
if ($this->storeMetaphoneInfoAsWords) {
$this->metaphoneContent.= ' '.$retArr[$val]['metaphone'];
}
// Priority used for flagBitMask feature (see extension configuration)
$retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset);
$retArr[$val]['count'] = $retArr[$val]['count']+1;
$retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
$retArr[$val]['metaphone'] = $this->metaphone($val);
$retArr[$val]['count']++; // Increase number of occurences
$this->wordcount++;
}
}
......
* @return void
*/
function analyzeBody(&$retArr,$content) {
foreach($content['body'] as $key => $val) {
$val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same.
if(!isset($retArr[$val])) {
foreach ($content['body'] as $key => $val) {
$val = substr($val,0,60); // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same.
if (!isset($retArr[$val])) {
// First occurence (used for ranking results)
$retArr[$val]['first'] = $key;
$retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
$retArr[$val]['metaphone'] = $this->metaphone($val);
// Word ID (wid)
$retArr[$val]['hash'] = $this->md5inthash($val);
// Metaphone value is also only 60 chars long
$metaphone = $this->enableMetaphoneSearch
? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60)
: '';
$retArr[$val]['metaphone'] = $metaphone;
}
$retArr[$val]['count'] = $retArr[$val]['count']+1;
// Build metaphone fulltext string (can be used for fulltext indexing)
if ($this->storeMetaphoneInfoAsWords) {
$this->metaphoneContent.= ' '.$retArr[$val]['metaphone'];
}
$retArr[$val]['count']++; // Increase number of occurences
$this->wordcount++;
}
}
......
if (is_object($this->metaphoneObj)) {
$tmp = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']);
} else {
// Use native PHP function instead of advanced doubleMetaphone class
$tmp = metaphone($word);
}
// Return raw value?
if ($retRaw) return $tmp;
if ($retRaw) { // Return raw value?
$ret = $tmp;
} elseif (strlen($tmp)) { // Otherwise create hash and return integer
$ret = $this->md5inthash($tmp);
} else {
$ret = 0;
}
// Otherwise create hash and return integer
if($tmp=='') $ret=0; else $ret=hexdec(substr(md5($tmp),0,7));
return $ret;
}
......
'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
);
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
if ($this->isTableUsed('index_phash')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
}
// PROCESSING index_section
$this->submit_section($this->hash['phash'],$this->hash['phash']);
......
// PROCESSING index_fulltext
$fields = array(
'phash' => $this->hash['phash'],
'fulltextdata' => implode(' ', $this->contentParts)
'fulltextdata' => implode(' ', $this->contentParts),
'metaphonedata' => $this->metaphoneContent
);
if ($this->indexerConfig['fullTextDataLength']>0) {
$fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']);
}
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
if ($this->isTableUsed('index_fulltext')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
}
// PROCESSING index_debug
if ($this->indexerConfig['debugMode']) {
......
'lexer' => $this->lexerObj->debugString,
))
);
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
if ($this->isTableUsed('index_debug')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
}
}
}
......
'hash_gr_list' => $this->md5inthash($this->conf['gr_list']),
'gr_list' => $this->conf['gr_list']
);
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
if ($this->isTableUsed('index_grlist')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
}
}
/**
......
$this->getRootLineFields($fields);
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
if ($this->isTableUsed('index_section')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
}
}
/**
......
function removeOldIndexedPages($phash) {
// Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here.
$tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug');
foreach($tableArr as $table) {
$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
foreach ($tableArr as $table) {
if ($this->isTableUsed($table)) {
$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
}
}
// Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file).
$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
if ($this->isTableUsed('index_section')) {
$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
}
}
......
'tstamp' => time(),
'crdate' => time(),
'gr_list' => $this->conf['gr_list'],
'externalUrl' => $fileParts['scheme'] ? 1 : 0,
'recordUid' => intval($this->conf['recordUid']),
'freeIndexUid' => intval($this->conf['freeIndexUid']),
'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
'externalUrl' => $fileParts['scheme'] ? 1 : 0,
'recordUid' => intval($this->conf['recordUid']),
'freeIndexUid' => intval($this->conf['freeIndexUid']),
'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
);
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
if ($this->isTableUsed('index_phash')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
}
// PROCESSING index_fulltext
$fields = array(
'phash' => $hash['phash'],
'fulltextdata' => implode(' ', $contentParts)
'fulltextdata' => implode(' ', $contentParts),
'metaphonedata' => $this->metaphoneContent
);
if ($this->indexerConfig['fullTextDataLength']>0) {
$fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']);
}
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
if ($this->isTableUsed('index_fulltext')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
}
// PROCESSING index_debug
if ($this->indexerConfig['debugMode']) {
......
'lexer' => $this->lexerObj->debugString,
))
);
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
if ($this->isTableUsed('index_debug')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
}
}
}
......
*/
function submitFile_grlist($hash) {
// Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one.
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')');
if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
if ($this->isTableUsed('index_grlist')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')');
} else {
$res = false;
}
if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
$this->submit_grlist($hash,$hash);
}
}
......
* @return void
*/
function submitFile_section($hash) {
// Testing if there is a section
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));
if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
// Testing if there is already a section
if ($this->isTableUsed('index_section')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));
} else {
$res = false;
}
if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
$this->submit_section($hash,$this->hash['phash']);
}
}
......
* @return void
*/
function removeOldIndexedFiles($phash) {
// Removing old registrations for tables.
$tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug');
foreach($tableArr as $table) {
$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
foreach ($tableArr as $table) {
if ($this->isTableUsed($table)) {
$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
}
}
}
......
* @return integer Result integer: Generally: <0 = No indexing, >0 = Do indexing (see $this->reasons): -2) Min age was NOT exceeded and so indexing cannot occur. -1) mtime matched so no need to reindex page. 0) N/A 1) Max age exceeded, page must be indexed again. 2) mtime of indexed page doesn't match mtime given for current content and we must index page. 3) No mtime was set, so we will index... 4) No indexed page found, so of course we will index.
*/
function checkMtimeTstamp($mtime,$phash) {
$out = 0;
// Select indexed page:
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
$out = 0;
if ($this->isTableUsed('index_phash')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
} else {
$res = false;
}
// If there was an indexing of the page...:
if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
if ($this->tstamp_maxAge && ($row['tstamp']+$this->tstamp_maxAge) < time()) { // If max age is exceeded, index the page
$out = 1; // The configured max-age was exceeded for the document and thus it's indexed.
} else {
......
*/
function checkContentHash() {
// With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page.
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h));
if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
if ($this->isTableUsed('index_phash')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash', 'phash_grouping='.intval($this->hash['phash_grouping']).' AND contentHash='.intval($this->content_md5h));
} else {
$res = false;
}
if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
return $row;
}
return 1;
......
* @return boolean Returns true if the document needs to be indexed (that is, there was no result)
*/
function checkExternalDocContentHash($hashGr,$content_md5h) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h));
if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
if ($this->isTableUsed('index_phash')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash', 'phash_grouping='.intval($hashGr).' AND contentHash='.intval($content_md5h));
} else {
$res = false;
}
if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
return 0;
}
return 1;
......
* @return void
*/
function is_grlist_set($phash_x) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));
return $GLOBALS['TYPO3_DB']->sql_num_rows($res);
if ($this->isTableUsed('index_grlist')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));
} else {
$res = false;
}
return $res ? $GLOBALS['TYPO3_DB']->sql_num_rows($res) : false;
}
/**
......
* @see submit_grlist()
*/
function update_grlist($phash,$phash_x) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));
if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
if ($this->isTableUsed('index_grlist')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));
} else {
$res = false;
}
if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
$this->submit_grlist($phash,$phash_x);
$this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1);
}
......
);
if ($mtime) { $updateFields['item_mtime'] = intval($mtime); }
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
if ($this->isTableUsed('index_phash')) {
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
}
}
/**
......
'freeIndexSetId' => intval($this->conf['freeIndexSetId'])
);
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
if ($this->isTableUsed('index_phash')) {
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
}
}
/**
......
'parsetime' => intval($parsetime)
);
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
if ($this->isTableUsed('index_phash')) {
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
}
}
/**
......
$updateFields = array();
$this->getRootLineFields($updateFields);
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);
if ($this->isTableUsed('index_section')) {
$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);
}
}
/**
......
* @return void
*/
function removeLoginpagesWithContentHash() {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '
if ($this->isTableUsed('index_phash,index_grlist')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '
A.phash=B.phash
AND A.phash_grouping='.intval($this->hash['phash_grouping']).'
AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).'
AND A.contentHash='.intval($this->content_md5h));
while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
$this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1);
$this->removeOldIndexedPages($row['phash']);
} else {
$res = false;
}
if ($res) {
while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
$this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1);
$this->removeOldIndexedPages($row['phash']);
}
}
}
/**
......
require_once(t3lib_extMgm::extPath('crawler').'class.tx_crawler_lib.php');
}
/**
* Check if the tables provided are configured for usage.
* This becomes neccessary for extensions that provide additional database functionality like indexed_search_mysql.
*
* @param string Comma-separated list of tables
* @return boolean True if given tables are enabled
*/
function isTableUsed($table_list) {
$OK = true;
$tableArr = t3lib_div::trimExplode(',', $table_list);
$enabledTableList = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['use_tables'];
foreach ($tableArr as $table) {
if (!t3lib_div::inList($enabledTableList, $table)) {
$OK = false;
}
}
return $OK;
}
......
/********************************
*
* SQL; Submitting words
......
}
if (count($phashArr)) {
$cwl = implode(',',$phashArr);
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')');
if ($this->isTableUsed('index_words')) {
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')');
} else {
$res = false;
}
if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) {
if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) {
$this->log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1);
while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
unset($wl[$row['baseword']]);
}
reset($wl);
while(list($key,$val)=each($wl)) {
while (list($key,$val)=each($wl)) {
$insertFields = array(
'wid' => $val['hash'],
'baseword' => $key,
'metaphone' => $val['metaphone']
);
// A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem.
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
if ($this->isTableUsed('index_words')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
}
}
}
}
......
* @return void
*/
function submitWords($wl,$phash) {
$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash));
if ($this->isTableUsed('index_rel')) {
$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash));
}
foreach($wl as $val) {
foreach ($wl as $val) {
$insertFields = array(
'phash' => $phash,
'wid' => $val['hash'],
......
'flags' => ($val['cmp'] & $this->flagBitMask)
);
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
if ($this->isTableUsed('index_rel')) {
$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
}
}
}
typo3/sysext/indexed_search/ext_tables.sql (Arbeitskopie)
CREATE TABLE index_fulltext (
phash int(11) DEFAULT '0' NOT NULL,
fulltextdata mediumtext,
metaphonedata mediumtext,
PRIMARY KEY (phash)
) ENGINE=InnoDB;
......
PRIMARY KEY (uid)
) ENGINE=InnoDB;
#
# Table structure for table 'index_stat_word'
#
......
word varchar(30) DEFAULT '' NOT NULL,
index_stat_search_id int(11) DEFAULT '0' NOT NULL,
tstamp int(11) DEFAULT '0' NOT NULL,
pageid int(11) DEFAULT '0' NOT NULL,
PRIMARY KEY (uid),
KEY tstamp (tstamp,word)
) ENGINE=InnoDB;
......
PRIMARY KEY (uid),
KEY parent (pid)
);
#
# Table structure for table 'index_stat_word'
#
CREATE TABLE index_stat_word (
uid int(11) NOT NULL auto_increment,
word varchar(30) DEFAULT '' NOT NULL,
index_stat_search_id int(11) DEFAULT '0' NOT NULL,
tstamp int(11) DEFAULT '0' NOT NULL,
pageid int(11) DEFAULT '0' NOT NULL,
PRIMARY KEY (uid),
KEY tstamp (tstamp,word)
) ENGINE=InnoDB;
typo3/sysext/indexed_search/doc/README.txt (Arbeitskopie)
A full documentation manual for the indexed search extension can be found in the extension "doc_indexed_search" in the TER.
See http://typo3.org/documentation/document-library/extension-manuals/doc_indexed_search/current/view/
This is a list of all tables which are used by this extension:
index_phash
- Page information
index_fulltext
- Fulltext data
index_rel
- Relations between index_phash and index_words
index_words
- baseword table
index_section
- section index (= first 3 levels of the rootline for this document)
index_grlist
- group list information
- indicates which gr_list has access to which phash
typo3/sysext/indexed_search/class.doublemetaphone.php (Arbeitskopie)
// TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file:
// TYPO3: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example.
// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you can enable it in the extension configuration
// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php)
class user_DoubleMetaPhone
{
typo3/sysext/indexed_search/ext_localconf.php (Arbeitskopie)
'tif' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse',
);
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_rel,index_words,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
$_EXTCONF = unserialize($_EXTCONF); // unserializing the configuration so we can use it here:
// Use the advanced doubleMetaphone parser instead of the internal one (usage of metaphone parsers is generally disabled by default)
if (isset($_EXTCONF['enableMetaphoneSearch']) && intval($_EXTCONF['enableMetaphoneSearch'])==2) {
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
}
// EXAMPLE configuration of hooks:
/*
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['pi1_hooks'] = array (
typo3/sysext/indexed_search/pi/considerations.txt (Arbeitskopie)
- Search is always case insensitive. If you need a case sensitive search, use a binary collation for the index_fulltext and index_words tables.
MAILS about:
......
If you are an SQL wizard, you may be able to help me here.
In the (coming) index searching thing, I have three main tables.
In the (coming) index searching thing, I have three main tables.
- index_words which contains all the words indexed
- index_pages which represents a link to a page id or external url
......
So searching an OR search for "content" and "management" could be done like this:
SELECT STRAIGHT_JOIN [some fields here...] FROM
index_words AS IW,
index_rel AS IR,
SELECT STRAIGHT_JOIN [some fields here...] FROM
index_words AS IW,
index_rel AS IR,
index_phash AS IP
WHERE
IR.phash = IP.phash AND
IW.wid=IR.wid AND
WHERE
IR.phash = IP.phash AND
IW.wid=IR.wid AND
(IW.baseword = 'content' OR IW.baseword = 'management')
[... and here comes some GROUP BY, ORDER BY and LIMIT]
......
Therefore I tought of a little trick to do it:
SELECT STRAIGHT_JOIN [some fields here...] FROM
index_words AS IW,
index_rel AS IR,
index_words AS IW2,
index_rel AS IR2,
SELECT STRAIGHT_JOIN [some fields here...] FROM
index_words AS IW,
index_rel AS IR,
index_words AS IW2,
index_rel AS IR2,
index_phash AS IP
WHERE
IW.wid=IR.wid AND
IW2.wid=IR2.wid AND
IR.phash = IP.phash AND
IR2.phash = IP.phash AND
WHERE
IW.wid=IR.wid AND
IW2.wid=IR2.wid AND
IR.phash = IP.phash AND
IR2.phash = IP.phash AND
(IW.baseword = 'content' and IW2.baseword = 'management')
[... and here comes some GROUP BY, ORDER BY and LIMIT]
... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins.
... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins.
Can anyone help me?
I checked out kwIndex from hotscripts and he does it like this:
I checked out kwIndex from hotscripts and he does it like this:
1) Select the word-ids (SQL-query 1)
2) If both words were found, make another query for all linking-table entries matching the words and group by the word-id. The count(*) statement shows the number equal to the number of searchwords if they were both found. So select only records which delivers this. Then you have the document ids.... (SQL 2)
......
OK, I am not an sql-wizard. Just some hints and wishes, which I hope are
helpful.
>
>
> 1) To the AND question:
>
>
> Maybe it's best to make a search for each word; After getting the total list
> of page-ids from first search, this is included as a condition in the next
> search, which generates a new list which is included in the next search,
......
I have to say a really elaborate, fast and multiformat (pdf's !) search
engine is really one the single most important things for every 100+
website.
>
>
> 2) Search query syntax
> Any suggestions to a search query syntax.
> - Search for "content management" is by default AND search
......
*****************************************************************************************************************
OK there were some fancy calculations promoted by Graeme Merrall:
"However, regarding relevance you probably want to look at something like
Salton's formula which is a good easy way to measure relevance.
Oracle Intermedia uses this and it's pretty simple:
Score can be between 0 and 100, but the top-scoring document in the query
will not necessarily have a score of 100 -- scoring is relative, not
absolute. This means that scores are not comparable across indexes, or even
across different queries on the same index. Score for each document is
computed using the standard Salton formula:
3f(1+log(N/n))
Where f is the frequency of the search term in the document, N is the total
number of rows in the table, and n is the number of rows which contain the
search term. This is converted into an integer in the range 0 - 100.
There's a good doc on it at
http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/
although it may be a little complex for what you require so just pick the
relevant parts out.
"
However I chose not to go with this for several reasons.
I do not claim that my ways of calculating importance here is the best.
ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.)
*****************************************************************************************************************
*****************************************************************************************************************
typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php (Arbeitskopie)
var $cache_rl = array(); // Caching of root line data
var $fe_groups_required = array(); // Required fe_groups memberships for display of a result.
var $domain_records = array(); // Domain records (?)
var $wSelClauses = array(); // Select clauses for individual words
var $resultSections = array(); // Page tree sections for search result.
var $external_parsers = array(); // External parser objects
var $iconFileNameCache = array(); // Storage of icons....
var $templateCode; // Will hold the content of $conf['templateFile']
var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results';
var $indexerConfig = array(); // Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']
var $enableMetaphoneSearch = false;
var $storeMetaphoneInfoAsWords;
/**
* Lexer object
*
......
*/
var $lexerObj;
/**
* Indexer object
*
* @var tx_indexedsearch_indexer
*/
var $indexerObj;
var $templateCode; // Will hold the content of $conf['templateFile']
var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results';
const WILDCARD_LEFT = 1;
const WILDCARD_RIGHT = 2;
/**
......
$this->pi_loadLL();
$this->pi_setPiVarDefaults();
// Initialize the indexer-class - just to use a few function (for making hashes)
$this->indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');
// Initialize:
$this->initialize();
......
$this->printRules().
$content;
return $this->pi_wrapInBaseClass($content);
}
return $this->pi_wrapInBaseClass($content);
}
/**
* Initialize internal variables, especially selector box values for the search form and search words
......
function initialize() {
global $TYPO3_CONF_VARS;
// Indexer configuration from Extension Manager interface:
$this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
$this->enableMetaphoneSearch = $this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0;
$this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : true;
// Initialize external document parsers for icon display and other soft operations
if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) {
foreach ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) {
......
)
);
if (!$this->enableMetaphoneSearch) {
unset ($this->optValues['type']['10']); // Remove this option if metaphone search is disabled)
}
// Free Index Uid:
if ($this->conf['search.']['defaultFreeIndexUidList']) {
$uidList = t3lib_div::intExplode(',', $this->conf['search.']['defaultFreeIndexUidList']);
......
// Add search languages:
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'sys_language', '1=1'.$this->cObj->enableFields('sys_language'));
while($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
$this->optValues['lang'][$lR['uid']] = $lR['title'];
if ($res) {
while ($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
$this->optValues['lang'][$lR['uid']] = $lR['title'];
}
}
// Calling hook for modification of initialized content
......
}
/**
* Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holds the SQL operator (eg. AND, OR)
* Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holding the SQL operator (eg. AND, OR)
*
* Only words with 2 or more characters are accepted
* Max 200 chars total
* Space is used to split words, "" can be used search for a whole string (not indexed search then)
* Space is used to split words, "" can be used search for a whole string
* AND, OR and NOT are prefix words, overruling the default operator
* +/|/- equals AND, OR and NOT as operators.
* All search words are converted to lowercase.
......
$inSW = $GLOBALS['TSFE']->csConvObj->utf8_encode($inSW, $GLOBALS['TSFE']->metaCharset);
$inSW = $GLOBALS['TSFE']->csConvObj->entities_to_utf8($inSW,TRUE);
$sWordArray = false;
if ($hookObj = &$this->hookRequest('getSearchWords')) {
return $hookObj->getSearchWords_splitSWords($inSW, $defOp);
$sWordArray = $hookObj->getSearchWords_splitSWords($inSW, $defOp);
} else {
if ($this->piVars['type']==20) {
return array(array('sword'=>trim($inSW), 'oper'=>'AND'));
if ($this->piVars['type']==20) { // Sentence
$sWordArray = array(
array(
'sword' => trim($inSW),
'oper' => 'AND'
)
);
} else {
$search = t3lib_div::makeInstance('tslib_search');
$search->default_operator = $defOp==1 ? 'OR' : 'AND';
......
$search->register_and_explode_search_string($inSW);
if (is_array($search->sword_array)) {
return $this->procSearchWordsByLexer($search->sword_array);
$sWordArray = $this->procSearchWordsByLexer($search->sword_array);
}
}
}
return $sWordArray;
}
/**
......
// Getting SQL result pointer:
$GLOBALS['TT']->push('Searching result');
$res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid);
if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) {
$res = $hookObj->getResultRows_SQLpointer($sWArr,$freeIndexUid);
} else {
$res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid);
}
$GLOBALS['TT']->pull();
// Organize and process result:
......
// Each row should contain the fields from 'ISEC.*, IP.*' combined + artificial fields "show_resume" (boolean) and "result_number" (counter)
while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
if (!$this->checkExistance($row)) {
// Check if the record is still available or if it has been deleted meanwhile.
// Currently this works for files only, since extending it to content elements would cause a lot of overhead...
// Otherwise, skip the row.
$count--;
continue;
}
// Set first row:
if (!$c) {
$firstRow = $row;
......
$c++; // Increase the result pointer
// All rows for display is put into resultRows[]
if ($c > $pointer * $this->piVars['results']) {
if ($c > $pointer * $this->piVars['results'] && $c <= ($pointer+1) * $this->piVars['results']) {
$row['result_number'] = $c;
$resultRows[] = $row;
// This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above).
if (!$exactCount && (($c+1) > ($pointer+1)*$this->piVars['results'])) { break; }
}
// This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above).
if (!$exactCount && (($c+1) > ($pointer+1) * $this->piVars['results'])) {
break;
}
} else {
$count--; // Skip this row if the user cannot view it (missing permission)
}
......
}
return array(
'resultRows' => $resultRows,
'firstRow' => $firstRow,
'count' => $count
);
'resultRows' => $resultRows,
'firstRow' => $firstRow,
'count' => $count
);
} else { // No results found:
return FALSE;
}
......
* @return pointer
*/
function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1) {
// This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches.
// This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches.
$list = $this->getPhashList($sWArr);
// Perform SQL Search / collection of result rows array:
......
// Initialize variables:
$c=0;
$totalHashList = array(); // This array accumulates the phash-values
$this->wSelClauses = array();
// Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator)
foreach ($sWArr as $k => $v) {
// Making the query for a single search word based on the search-type
$sWord = $v['sword']; // $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower'); // lower-case all of them...
$theType = (string)$this->piVars['type'];
if (strstr($sWord,' ')) $theType = 20; // If there are spaces in the search-word, make a full text search instead.
if (strstr($sWord,' ')) {
$theType = 20; // If there are spaces in the search-word, make a full text search instead.
}
$GLOBALS['TT']->push('SearchWord "'.$sWord.'" - $theType='.$theType);
$res = '';
$wSel='';
// Perform search for word:
switch($theType) {
switch ($theType) {
case '1': // Part of word
$wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'";
$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
$res = $this->searchWord($sWord, self::WILDCARD_LEFT | self::WILDCARD_RIGHT);
break;
case '2': // First part of word
$wSel = "IW.baseword LIKE '".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'";
$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
$res = $this->searchWord($sWord, self::WILDCARD_RIGHT);
break;
case '3': // Last part of word
$wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."'";
$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
$res = $this->searchWord($sWord, self::WILDCARD_LEFT);
break;
case '10': // Sounds like
$wSel = 'IW.metaphone = '.$this->indexerObj->metaphone($sWord);
$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
/**
* Indexer object
*
* @var tx_indexedsearch_indexer
*/
$indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); // Initialize the indexer-class
// Perform metaphone search
$res = $this->searchMetaphone($indexerObj->metaphone($sWord,$this->storeMetaphoneInfoAsWords));
unset($indexerObj);
break;
case '20': // Sentence
$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
'ISEC.phash',
'index_section ISEC, index_fulltext IFT',
'IFT.fulltextdata LIKE \'%'.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_fulltext').'%\' AND
ISEC.phash = IFT.phash
'.$this->sectionTableWhere(),
'ISEC.phash'
);
$wSel = '1=1';
if ($this->piVars['type']==20) $this->piVars['order'] = 'mtime'; // If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instaed. It is not required, but otherwise some hits may be left out.
$res = $this->searchSentence($sWord);
... This diff was truncated because it exceeds the maximum size that can be displayed.
(2-2/3)