Index: typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php =================================================================== --- typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php (Revision 0) +++ typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php (Revision 0) @@ -0,0 +1,239 @@ + + */ +/** + * [CLASS/FUNCTION INDEX of SCRIPT] + * + * + * + * TOTAL FUNCTIONS: 0 + * (This index is automatically created/updated by the extension "extdeveval") + * + */ + + + + + + + + + + +/** + * Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries. + * + * @author Michael Stucki + * @package TYPO3 + * @subpackage tx_indexedsearch_mysql + */ +class tx_indexedsearch_mysql { + var $pObj; + + /** + * Gets a SQL result pointer to traverse for the search records. + * + * @param array Search words + * @param integer Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. + * @return pointer + */ + function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1) { + // Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not + $searchData = $this->getSearchString($sWArr); + + // Perform SQL Search / collection of result rows array: + if ($searchData) { + // Do the search: + $GLOBALS['TT']->push('execFinalQuery'); + $res = $this->execFinalQuery_fulltext($searchData,$freeIndexUid); + $GLOBALS['TT']->pull(); + return $res; + } else { + return false; + } + } + + /** + * Returns a search string for use with MySQL FULLTEXT query + * + * @param array Search word array + * @return string Search string + */ + function getSearchString($sWArr) { + + // Initialize variables: + $count = 0; + + $searchBoolean = false; // Change this to true to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty) + $fulltextIndex = 'index_fulltext.fulltextdata'; + + $naturalSearchString = ''; // This holds the result if the search is natural (doesn't contain any boolean operators) + $booleanSearchString = ''; // This holds the result if the search is boolen (contains +/-/| operators) + + // Traverse searchwords and prefix them with corresponding operator + foreach ($sWArr as $k => $v) { + // Making the query for a single search word based on the search-type + $sWord = $v['sword']; // $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower'); // lower-case all of them... + $trail_natural = ''; + $trail_boolean = ''; + + $theType = (string)$this->pObj->piVars['type']; + if (strstr($sWord,' ')) { + $theType = 20; // If there are spaces in the search-word, make a full text search instead. + } + + switch ($theType) { + case '1': // Part of word + case '3': // Last part of word + // These options are both not possible with fulltext indexing! Therefore, fallback to first-part-of-word search + case '2': // First part of word + $trail_boolean = '*'; + // Part-of-word search requires boolean mode! + $searchBoolean = true; + break; + case '10': // Sounds like + + /** + * Indexer object + * + * @var tx_indexedsearch_indexer + */ + $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); // Initialize the indexer-class + + // Perform metaphone search + $sWord = $indexerObj->metaphone($sWord,$this->pObj->storeMetaphoneInfoAsWords); + + unset($indexerObj); + $fulltextIndex = 'index_fulltext.metaphonedata'; + break; + case '20': // Sentence + $searchBoolean = true; + $sWord = preg_replace('/^"(.*)"$/','$1',$sWord); // Remove existing quotes as they will be added later anyway... + break; + default: // Distinct word + } + + // Perform search for word: + switch ($v['oper']) { + case 'AND NOT': + $booleanSearchString.= ' -' . $sWord . $trail_boolean; + $searchBoolean = true; + break; + case 'OR': + $booleanSearchString.= ' ' . $sWord . $trail_boolean; + $searchBoolean = true; + break; + default: + $booleanSearchString.= ' +' . $sWord . $trail_boolean; + $naturalSearchString.= ' ' . $sWord . $trail_natural; + } + + $count++; + } + + if ($theType=='20') { // Sentence + $searchString = '"'.trim($naturalSearchString).'"'; + + } elseif ($searchBoolean) { + $searchString = trim($booleanSearchString); + + } else { + $searchString = trim($naturalSearchString); + } + + return array( + 'searchBoolean' => $searchBoolean, + 'searchString' => $searchString, + 'fulltextIndex' => $fulltextIndex + ); + } + + /** + * Execute final query, based on phash integer list. The main point is sorting the result in the right order. + * + * @param array Array with search string, boolean indicator, and fulltext index reference + * @param integer Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. + * @return pointer Query result pointer + */ + function execFinalQuery_fulltext($searchData,$freeIndexUid=-1) { + + // Setting up methods of filtering results based on page types, access, etc. + $page_join = ''; + $page_where = ''; + + // Indexing configuration clause: + $freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid); + + // Calling hook for alternative creation of page ID list + if ($hookObj = &$this->pObj->hookRequest('execFinalQuery_idList')) { + $page_where = $hookObj->execFinalQuery_idList(''); // Originally this hook expects a list of page IDs, so since we don't know them yet, just send an empty string. Users of this hook need to adjust their hook to this! + } elseif ($this->pObj->join_pages) { // Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected. + $page_join = ', + pages'; + $page_where = 'pages.uid = ISEC.page_id + '.$this->pObj->cObj->enableFields('pages').' + AND pages.no_search=0 + AND pages.doktype<200 + '; + } elseif ($this->pObj->wholeSiteIdList>=0) { // Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field! + $siteIdNumbers = t3lib_div::intExplode(',',$this->pObj->wholeSiteIdList); + $id_list = array(); + while(list(,$rootId)=each($siteIdNumbers)) { + $id_list[] = $this->pObj->cObj->getTreeList($rootId,9999,0,0,'','').$rootId; + } + $page_where = ' ISEC.page_id IN ('.implode(',',$id_list).')'; + } else { // Disable everything... (select all) + $page_where = ' 1=1'; + } + + $searchBoolean = ''; + if ($searchData['searchBoolean']) { + $searchBoolean = ' IN BOOLEAN MODE'; + } + + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( + 'index_fulltext.*, ISEC.*, IP.*', + 'index_fulltext, index_section ISEC, index_phash IP' . $page_join, + 'MATCH ('.$searchData['fulltextIndex'].') AGAINST ('.$GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'],'index_fulltext').$searchBoolean.') '. + $this->pObj->mediaTypeWhere().' '. + $this->pObj->languageWhere(). + $freeIndexUidClause.' + AND index_fulltext.phash = IP.phash + AND ISEC.phash = IP.phash + AND '.$page_where, + 'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2 ,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId' + ); + + return $res; + } +} + +?> Index: typo3/sysext/indexed_search_mysql/ext_localconf.php =================================================================== --- typo3/sysext/indexed_search_mysql/ext_localconf.php (Revision 0) +++ typo3/sysext/indexed_search_mysql/ext_localconf.php (Revision 0) @@ -0,0 +1,10 @@ + Index: typo3/sysext/indexed_search_mysql/ChangeLog =================================================================== --- typo3/sysext/indexed_search_mysql/ChangeLog (Revision 0) +++ typo3/sysext/indexed_search_mysql/ChangeLog (Revision 0) @@ -0,0 +1,3 @@ +2008-03-18 Michael Stucki + + * Initial version Index: typo3/sysext/indexed_search_mysql/ext_tables.sql =================================================================== --- typo3/sysext/indexed_search_mysql/ext_tables.sql (Revision 0) +++ typo3/sysext/indexed_search_mysql/ext_tables.sql (Revision 0) @@ -0,0 +1,16 @@ +# +# Table structure for table 'index_fulltext' +# +# Differences compared to original definition in EXT:indexed_search are as follows: +# - Add new mediumtext field "metaphonedata" +# - Add new FULLTEXT index "fulltextdata" +# - Add new FULLTEXT index "metaphonedata" +# - Change table engine from InnoDB to MyISAM (required for FULLTEXT indexing) +CREATE TABLE index_fulltext ( + phash int(11) DEFAULT '0' NOT NULL, + fulltextdata mediumtext, + metaphonedata mediumtext, + PRIMARY KEY (phash) + FULLTEXT fulltextdata (fulltextdata) + FULLTEXT metaphonedata (metaphonedata) +) ENGINE=MyISAM; Index: typo3/sysext/indexed_search_mysql/ext_emconf.php =================================================================== --- typo3/sysext/indexed_search_mysql/ext_emconf.php (Revision 0) +++ typo3/sysext/indexed_search_mysql/ext_emconf.php (Revision 0) @@ -0,0 +1,54 @@ + 'MySQL driver for Indexed Search Engine', + 'description' => 'MySQL specific driver for Indexed Search Engine. Allows usage of MySQL-only features like FULLTEXT indexes.', + 'category' => 'misc', + 'shy' => 0, + 'dependencies' => 'cms,indexed_search', + 'conflicts' => '', + 'priority' => '', + 'loadOrder' => '', + 'module' => '', + 'state' => 'alpha', + 'internal' => 1, + 'uploadfolder' => 0, + 'createDirs' => '', + 'modify_tables' => '', + 'clearCacheOnLoad' => 1, + 'lockType' => '', + 'author' => 'Michael Stucki', + 'author_email' => 'michael@typo3.org', + 'author_company' => '', + 'CGLcompliance' => '', + 'CGLcompliance_note' => '', + 'version' => '2.10.0', + '_md5_values_when_last_written' => 'a:5:{s:9:"ChangeLog";s:4:"1bb1";s:32:"class.tx_indexedsearch_mysql.php";s:4:"3a48";s:17:"ext_localconf.php";s:4:"31c9";s:14:"ext_tables.php";s:4:"c4b7";s:14:"ext_tables.sql";s:4:"7f93";}', + 'constraints' => array( + 'depends' => array( + 'cms' => '', + 'php' => '5.2.0-0.0.0', + 'typo3' => '4.2.0-0.0.0', + 'indexed_search' => '2.10.0-', + ), + 'conflicts' => array( + ), + 'suggests' => array( + 'doc_indexed_search' => '', + ), + ), + 'suggests' => array( + ), +); + +?> \ No newline at end of file Index: typo3/sysext/indexed_search/ChangeLog =================================================================== --- typo3/sysext/indexed_search/ChangeLog (Revision 4388) +++ typo3/sysext/indexed_search/ChangeLog (Arbeitskopie) @@ -1,3 +1,7 @@ +2008-11-03 Michael Stucki + + * Check if files in search matches are still existing before displaying them + 2008-04-01 Michael Stucki * Fixed bug #7980: Fix wrong TypoScript code in plugin template Index: typo3/sysext/indexed_search/class.indexer.php =================================================================== --- typo3/sysext/indexed_search/class.indexer.php (Revision 4388) +++ typo3/sysext/indexed_search/class.indexer.php (Arbeitskopie) @@ -191,6 +191,10 @@ var $freqRange = 32000; var $freqMax = 0.1; + var $enableMetaphoneSearch = false; + var $storeMetaphoneInfoAsWords; + var $metaphoneContent = ''; + // Objects: /** * Charset class object @@ -452,6 +456,11 @@ $this->maxExternalFiles = t3lib_div::intInRange($this->indexerConfig['maxExternalFiles'],0,1000,5); $this->flagBitMask = t3lib_div::intInRange($this->indexerConfig['flagBitMask'],0,255); + // Workaround: If the extension configuration was not updated yet, the value is not existing + $this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0) : 1; + + $this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : ($this->enableMetaphoneSearch ? true : false); + // Initialize external document parsers: // Example configuration, see ext_localconf.php of this file! if ($this->conf['index_externals']) { @@ -468,7 +477,8 @@ // Initialize metaphone hook: // Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone'; - if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) { + // Make sure that the hook is loaded _after_ indexed_search as this may overwrite the hook depending on the configuration. + if ($this->enableMetaphoneSearch && $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) { $this->metaphoneObj = &t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']); $this->metaphoneObj->pObj = &$this; } @@ -550,7 +560,7 @@ $this->log_pull(); // Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!) - $this->content_md5h = $this->md5inthash(implode($this->contentParts,'')); + $this->content_md5h = $this->md5inthash(implode('', $this->contentParts)); // This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash. // If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more. @@ -580,8 +590,10 @@ // Check words and submit to word list if not there $this->log_push('Check word list and submit words',''); - $this->checkWordList($indexArr); - $this->submitWords($indexArr,$this->hash['phash']); + if ($this->isTableUsed('index_words')) { + $this->checkWordList($indexArr); + $this->submitWords($indexArr,$this->hash['phash']); + } $this->log_pull(); // Set parsetime @@ -1055,8 +1067,10 @@ // Check words and submit to word list if not there $this->log_push('Check word list and submit words',''); - $this->checkWordList($indexArr); - $this->submitWords($indexArr,$phash_arr['phash']); + if ($this->isTableUsed('index_words')) { + $this->checkWordList($indexArr); + $this->submitWords($indexArr,$phash_arr['phash']); + } $this->log_pull(); // Set parsetime @@ -1244,7 +1258,7 @@ $this->analyzeHeaderinfo($indexArr,$content,'description',5); $this->analyzeBody($indexArr,$content); - return ($indexArr); + return $indexArr; } /** @@ -1257,13 +1271,29 @@ * @return void */ function analyzeHeaderinfo(&$retArr,$content,$key,$offset) { - reset($content[$key]); - while(list(,$val)=each($content[$key])) { - $val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same. + foreach ($content[$key] as $val) { + $val = substr($val,0,60); // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same. + + if (!isset($retArr[$val])) { + // Word ID (wid) + $retArr[$val]['hash'] = $this->md5inthash($val); + + // Metaphone value is also 60 only chars long + $metaphone = $this->enableMetaphoneSearch + ? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60) + : ''; + $retArr[$val]['metaphone'] = $metaphone; + } + + // Build metaphone fulltext string (can be used for fulltext indexing) + if ($this->storeMetaphoneInfoAsWords) { + $this->metaphoneContent.= ' '.$retArr[$val]['metaphone']; + } + + // Priority used for flagBitMask feature (see extension configuration) $retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset); - $retArr[$val]['count'] = $retArr[$val]['count']+1; - $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7)); - $retArr[$val]['metaphone'] = $this->metaphone($val); + + $retArr[$val]['count']++; // Increase number of occurences $this->wordcount++; } } @@ -1276,14 +1306,29 @@ * @return void */ function analyzeBody(&$retArr,$content) { - foreach($content['body'] as $key => $val) { - $val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same. - if(!isset($retArr[$val])) { + foreach ($content['body'] as $key => $val) { + $val = substr($val,0,60); // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same. + + if (!isset($retArr[$val])) { + // First occurence (used for ranking results) $retArr[$val]['first'] = $key; - $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7)); - $retArr[$val]['metaphone'] = $this->metaphone($val); + + // Word ID (wid) + $retArr[$val]['hash'] = $this->md5inthash($val); + + // Metaphone value is also only 60 chars long + $metaphone = $this->enableMetaphoneSearch + ? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60) + : ''; + $retArr[$val]['metaphone'] = $metaphone; } - $retArr[$val]['count'] = $retArr[$val]['count']+1; + + // Build metaphone fulltext string (can be used for fulltext indexing) + if ($this->storeMetaphoneInfoAsWords) { + $this->metaphoneContent.= ' '.$retArr[$val]['metaphone']; + } + + $retArr[$val]['count']++; // Increase number of occurences $this->wordcount++; } } @@ -1300,14 +1345,18 @@ if (is_object($this->metaphoneObj)) { $tmp = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']); } else { + // Use native PHP function instead of advanced doubleMetaphone class $tmp = metaphone($word); } - // Return raw value? - if ($retRaw) return $tmp; + if ($retRaw) { // Return raw value? + $ret = $tmp; + } elseif (strlen($tmp)) { // Otherwise create hash and return integer + $ret = $this->md5inthash($tmp); + } else { + $ret = 0; + } - // Otherwise create hash and return integer - if($tmp=='') $ret=0; else $ret=hexdec(substr(md5($tmp),0,7)); return $ret; } @@ -1368,7 +1417,9 @@ 'freeIndexSetId' => intval($this->conf['freeIndexSetId']), ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + } // PROCESSING index_section $this->submit_section($this->hash['phash'],$this->hash['phash']); @@ -1379,12 +1430,15 @@ // PROCESSING index_fulltext $fields = array( 'phash' => $this->hash['phash'], - 'fulltextdata' => implode(' ', $this->contentParts) + 'fulltextdata' => implode(' ', $this->contentParts), + 'metaphonedata' => $this->metaphoneContent ); if ($this->indexerConfig['fullTextDataLength']>0) { $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']); } - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + if ($this->isTableUsed('index_fulltext')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + } // PROCESSING index_debug if ($this->indexerConfig['debugMode']) { @@ -1399,7 +1453,9 @@ 'lexer' => $this->lexerObj->debugString, )) ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + if ($this->isTableUsed('index_debug')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + } } } @@ -1420,7 +1476,9 @@ 'hash_gr_list' => $this->md5inthash($this->conf['gr_list']), 'gr_list' => $this->conf['gr_list'] ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields); + if ($this->isTableUsed('index_grlist')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields); + } } /** @@ -1440,7 +1498,9 @@ $this->getRootLineFields($fields); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields); + if ($this->isTableUsed('index_section')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields); + } } /** @@ -1452,11 +1512,15 @@ function removeOldIndexedPages($phash) { // Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here. $tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug'); - foreach($tableArr as $table) { - $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + foreach ($tableArr as $table) { + if ($this->isTableUsed($table)) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + } } // Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file). - $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash)); + if ($this->isTableUsed('index_section')) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash)); + } } @@ -1520,22 +1584,27 @@ 'tstamp' => time(), 'crdate' => time(), 'gr_list' => $this->conf['gr_list'], - 'externalUrl' => $fileParts['scheme'] ? 1 : 0, - 'recordUid' => intval($this->conf['recordUid']), - 'freeIndexUid' => intval($this->conf['freeIndexUid']), - 'freeIndexSetId' => intval($this->conf['freeIndexSetId']), + 'externalUrl' => $fileParts['scheme'] ? 1 : 0, + 'recordUid' => intval($this->conf['recordUid']), + 'freeIndexUid' => intval($this->conf['freeIndexUid']), + 'freeIndexSetId' => intval($this->conf['freeIndexSetId']), ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + } // PROCESSING index_fulltext $fields = array( 'phash' => $hash['phash'], - 'fulltextdata' => implode(' ', $contentParts) + 'fulltextdata' => implode(' ', $contentParts), + 'metaphonedata' => $this->metaphoneContent ); if ($this->indexerConfig['fullTextDataLength']>0) { $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']); } - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + if ($this->isTableUsed('index_fulltext')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + } // PROCESSING index_debug if ($this->indexerConfig['debugMode']) { @@ -1548,7 +1617,9 @@ 'lexer' => $this->lexerObj->debugString, )) ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + if ($this->isTableUsed('index_debug')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + } } } @@ -1560,8 +1631,13 @@ */ function submitFile_grlist($hash) { // Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one. - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')'); - if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')'); + } else { + $res = false; + } + + if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { $this->submit_grlist($hash,$hash); } } @@ -1573,9 +1649,14 @@ * @return void */ function submitFile_section($hash) { - // Testing if there is a section - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id'])); - if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + // Testing if there is already a section + if ($this->isTableUsed('index_section')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id'])); + } else { + $res = false; + } + + if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { $this->submit_section($hash,$this->hash['phash']); } } @@ -1587,11 +1668,12 @@ * @return void */ function removeOldIndexedFiles($phash) { - // Removing old registrations for tables. $tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug'); - foreach($tableArr as $table) { - $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + foreach ($tableArr as $table) { + if ($this->isTableUsed($table)) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + } } } @@ -1623,13 +1705,17 @@ * @return integer Result integer: Generally: <0 = No indexing, >0 = Do indexing (see $this->reasons): -2) Min age was NOT exceeded and so indexing cannot occur. -1) mtime matched so no need to reindex page. 0) N/A 1) Max age exceeded, page must be indexed again. 2) mtime of indexed page doesn't match mtime given for current content and we must index page. 3) No mtime was set, so we will index... 4) No indexed page found, so of course we will index. */ function checkMtimeTstamp($mtime,$phash) { + $out = 0; // Select indexed page: - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash)); - $out = 0; + if ($this->isTableUsed('index_phash')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash)); + } else { + $res = false; + } // If there was an indexing of the page...: - if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { if ($this->tstamp_maxAge && ($row['tstamp']+$this->tstamp_maxAge) < time()) { // If max age is exceeded, index the page $out = 1; // The configured max-age was exceeded for the document and thus it's indexed. } else { @@ -1660,8 +1746,13 @@ */ function checkContentHash() { // With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page. - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h)); - if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if ($this->isTableUsed('index_phash')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash', 'phash_grouping='.intval($this->hash['phash_grouping']).' AND contentHash='.intval($this->content_md5h)); + } else { + $res = false; + } + + if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { return $row; } return 1; @@ -1676,8 +1767,13 @@ * @return boolean Returns true if the document needs to be indexed (that is, there was no result) */ function checkExternalDocContentHash($hashGr,$content_md5h) { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h)); - if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if ($this->isTableUsed('index_phash')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash', 'phash_grouping='.intval($hashGr).' AND contentHash='.intval($content_md5h)); + } else { + $res = false; + } + + if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { return 0; } return 1; @@ -1690,8 +1786,12 @@ * @return void */ function is_grlist_set($phash_x) { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x)); - return $GLOBALS['TYPO3_DB']->sql_num_rows($res); + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x)); + } else { + $res = false; + } + return $res ? $GLOBALS['TYPO3_DB']->sql_num_rows($res) : false; } /** @@ -1703,8 +1803,13 @@ * @see submit_grlist() */ function update_grlist($phash,$phash_x) { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list'])); - if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list'])); + } else { + $res = false; + } + + if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { $this->submit_grlist($phash,$phash_x); $this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1); } @@ -1723,7 +1828,9 @@ ); if ($mtime) { $updateFields['item_mtime'] = intval($mtime); } - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + } } /** @@ -1737,7 +1844,9 @@ 'freeIndexSetId' => intval($this->conf['freeIndexSetId']) ); - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + } } /** @@ -1752,7 +1861,9 @@ 'parsetime' => intval($parsetime) ); - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + } } /** @@ -1765,7 +1876,9 @@ $updateFields = array(); $this->getRootLineFields($updateFields); - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields); + if ($this->isTableUsed('index_section')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields); + } } /** @@ -1795,15 +1908,22 @@ * @return void */ function removeLoginpagesWithContentHash() { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', ' + if ($this->isTableUsed('index_phash,index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', ' A.phash=B.phash AND A.phash_grouping='.intval($this->hash['phash_grouping']).' AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).' AND A.contentHash='.intval($this->content_md5h)); - while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { - $this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1); - $this->removeOldIndexedPages($row['phash']); + } else { + $res = false; } + + if ($res) { + while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + $this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1); + $this->removeOldIndexedPages($row['phash']); + } + } } /** @@ -1817,8 +1937,26 @@ require_once(t3lib_extMgm::extPath('crawler').'class.tx_crawler_lib.php'); } + /** + * Check if the tables provided are configured for usage. + * This becomes neccessary for extensions that provide additional database functionality like indexed_search_mysql. + * + * @param string Comma-separated list of tables + * @return boolean True if given tables are enabled + */ + function isTableUsed($table_list) { + $OK = true; + $tableArr = t3lib_div::trimExplode(',', $table_list); + $enabledTableList = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['use_tables']; + foreach ($tableArr as $table) { + if (!t3lib_div::inList($enabledTableList, $table)) { + $OK = false; + } + } + return $OK; + } @@ -1826,6 +1964,9 @@ + + + /******************************** * * SQL; Submitting words @@ -1846,23 +1987,29 @@ } if (count($phashArr)) { $cwl = implode(',',$phashArr); - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')'); + if ($this->isTableUsed('index_words')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')'); + } else { + $res = false; + } - if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) { + if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) { $this->log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1); - while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { unset($wl[$row['baseword']]); } reset($wl); - while(list($key,$val)=each($wl)) { + while (list($key,$val)=each($wl)) { $insertFields = array( 'wid' => $val['hash'], 'baseword' => $key, 'metaphone' => $val['metaphone'] ); // A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem. - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields); + if ($this->isTableUsed('index_words')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields); + } } } } @@ -1876,9 +2023,11 @@ * @return void */ function submitWords($wl,$phash) { - $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash)); + if ($this->isTableUsed('index_rel')) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash)); + } - foreach($wl as $val) { + foreach ($wl as $val) { $insertFields = array( 'phash' => $phash, 'wid' => $val['hash'], @@ -1888,7 +2037,9 @@ 'flags' => ($val['cmp'] & $this->flagBitMask) ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields); + if ($this->isTableUsed('index_rel')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields); + } } } Index: typo3/sysext/indexed_search/ext_tables.sql =================================================================== --- typo3/sysext/indexed_search/ext_tables.sql (Revision 4388) +++ typo3/sysext/indexed_search/ext_tables.sql (Arbeitskopie) @@ -38,6 +38,7 @@ CREATE TABLE index_fulltext ( phash int(11) DEFAULT '0' NOT NULL, fulltextdata mediumtext, + metaphonedata mediumtext, PRIMARY KEY (phash) ) ENGINE=InnoDB; @@ -116,7 +117,6 @@ PRIMARY KEY (uid) ) ENGINE=InnoDB; - # # Table structure for table 'index_stat_word' # @@ -125,6 +125,7 @@ word varchar(30) DEFAULT '' NOT NULL, index_stat_search_id int(11) DEFAULT '0' NOT NULL, tstamp int(11) DEFAULT '0' NOT NULL, + pageid int(11) DEFAULT '0' NOT NULL, PRIMARY KEY (uid), KEY tstamp (tstamp,word) ) ENGINE=InnoDB; @@ -177,17 +178,3 @@ PRIMARY KEY (uid), KEY parent (pid) ); - - -# -# Table structure for table 'index_stat_word' -# -CREATE TABLE index_stat_word ( - uid int(11) NOT NULL auto_increment, - word varchar(30) DEFAULT '' NOT NULL, - index_stat_search_id int(11) DEFAULT '0' NOT NULL, - tstamp int(11) DEFAULT '0' NOT NULL, - pageid int(11) DEFAULT '0' NOT NULL, - PRIMARY KEY (uid), - KEY tstamp (tstamp,word) -) ENGINE=InnoDB; Index: typo3/sysext/indexed_search/doc/README.txt =================================================================== --- typo3/sysext/indexed_search/doc/README.txt (Revision 4388) +++ typo3/sysext/indexed_search/doc/README.txt (Arbeitskopie) @@ -1,2 +1,24 @@ A full documentation manual for the indexed search extension can be found in the extension "doc_indexed_search" in the TER. See http://typo3.org/documentation/document-library/extension-manuals/doc_indexed_search/current/view/ + + +This is a list of all tables which are used by this extension: + +index_phash +- Page information + +index_fulltext +- Fulltext data + +index_rel +- Relations between index_phash and index_words + +index_words +- baseword table + +index_section +- section index (= first 3 levels of the rootline for this document) + +index_grlist +- group list information +- indicates which gr_list has access to which phash Index: typo3/sysext/indexed_search/class.doublemetaphone.php =================================================================== --- typo3/sysext/indexed_search/class.doublemetaphone.php (Revision 4388) +++ typo3/sysext/indexed_search/class.doublemetaphone.php (Arbeitskopie) @@ -36,9 +36,8 @@ // TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so: -// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file: -// TYPO3: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone'; -// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example. +// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you can enable it in the extension configuration +// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php) class user_DoubleMetaPhone { Index: typo3/sysext/indexed_search/ext_localconf.php =================================================================== --- typo3/sysext/indexed_search/ext_localconf.php (Revision 4388) +++ typo3/sysext/indexed_search/ext_localconf.php (Arbeitskopie) @@ -43,7 +43,15 @@ 'tif' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', ); +$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_rel,index_words,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config'; +$_EXTCONF = unserialize($_EXTCONF); // unserializing the configuration so we can use it here: + + // Use the advanced doubleMetaphone parser instead of the internal one (usage of metaphone parsers is generally disabled by default) +if (isset($_EXTCONF['enableMetaphoneSearch']) && intval($_EXTCONF['enableMetaphoneSearch'])==2) { + $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone'; +} + // EXAMPLE configuration of hooks: /* $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['pi1_hooks'] = array ( Index: typo3/sysext/indexed_search/pi/considerations.txt =================================================================== --- typo3/sysext/indexed_search/pi/considerations.txt (Revision 4388) +++ typo3/sysext/indexed_search/pi/considerations.txt (Arbeitskopie) @@ -1,3 +1,6 @@ +- Search is always case insensitive. If you need a case sensitive search, use a binary collation for the index_fulltext and index_words tables. + + MAILS about: @@ -17,7 +20,7 @@ If you are an SQL wizard, you may be able to help me here. -In the (coming) index searching thing, I have three main tables. +In the (coming) index searching thing, I have three main tables. - index_words which contains all the words indexed - index_pages which represents a link to a page id or external url @@ -27,13 +30,13 @@ So searching an OR search for "content" and "management" could be done like this: -SELECT STRAIGHT_JOIN [some fields here...] FROM -index_words AS IW, -index_rel AS IR, +SELECT STRAIGHT_JOIN [some fields here...] FROM +index_words AS IW, +index_rel AS IR, index_phash AS IP -WHERE -IR.phash = IP.phash AND -IW.wid=IR.wid AND +WHERE +IR.phash = IP.phash AND +IW.wid=IR.wid AND (IW.baseword = 'content' OR IW.baseword = 'management') [... and here comes some GROUP BY, ORDER BY and LIMIT] @@ -45,30 +48,30 @@ Therefore I tought of a little trick to do it: -SELECT STRAIGHT_JOIN [some fields here...] FROM -index_words AS IW, -index_rel AS IR, -index_words AS IW2, -index_rel AS IR2, +SELECT STRAIGHT_JOIN [some fields here...] FROM +index_words AS IW, +index_rel AS IR, +index_words AS IW2, +index_rel AS IR2, index_phash AS IP -WHERE -IW.wid=IR.wid AND -IW2.wid=IR2.wid AND -IR.phash = IP.phash AND -IR2.phash = IP.phash AND +WHERE +IW.wid=IR.wid AND +IW2.wid=IR2.wid AND +IR.phash = IP.phash AND +IR2.phash = IP.phash AND (IW.baseword = 'content' and IW2.baseword = 'management') [... and here comes some GROUP BY, ORDER BY and LIMIT] -... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins. +... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins. Can anyone help me? -I checked out kwIndex from hotscripts and he does it like this: +I checked out kwIndex from hotscripts and he does it like this: 1) Select the word-ids (SQL-query 1) 2) If both words were found, make another query for all linking-table entries matching the words and group by the word-id. The count(*) statement shows the number equal to the number of searchwords if they were both found. So select only records which delivers this. Then you have the document ids.... (SQL 2) @@ -159,9 +162,9 @@ OK, I am not an sql-wizard. Just some hints and wishes, which I hope are helpful. -> +> > 1) To the AND question: -> +> > Maybe it's best to make a search for each word; After getting the total list > of page-ids from first search, this is included as a condition in the next > search, which generates a new list which is included in the next search, @@ -196,7 +199,7 @@ I have to say a really elaborate, fast and multiformat (pdf's !) search engine is really one the single most important things for every 100+ website. -> +> > 2) Search query syntax > Any suggestions to a search query syntax. > - Search for "content management" is by default AND search @@ -317,3 +320,35 @@ ***************************************************************************************************************** +OK there were some fancy calculations promoted by Graeme Merrall: + +"However, regarding relevance you probably want to look at something like +Salton's formula which is a good easy way to measure relevance. +Oracle Intermedia uses this and it's pretty simple: +Score can be between 0 and 100, but the top-scoring document in the query +will not necessarily have a score of 100 -- scoring is relative, not +absolute. This means that scores are not comparable across indexes, or even +across different queries on the same index. Score for each document is +computed using the standard Salton formula: + + 3f(1+log(N/n)) + +Where f is the frequency of the search term in the document, N is the total +number of rows in the table, and n is the number of rows which contain the +search term. This is converted into an integer in the range 0 - 100. + +There's a good doc on it at +http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/ +although it may be a little complex for what you require so just pick the +relevant parts out. +" + +However I chose not to go with this for several reasons. +I do not claim that my ways of calculating importance here is the best. +ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.) + + + + +***************************************************************************************************************** +***************************************************************************************************************** Index: typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php =================================================================== --- typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php (Revision 4388) +++ typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php (Arbeitskopie) @@ -150,11 +150,17 @@ var $cache_rl = array(); // Caching of root line data var $fe_groups_required = array(); // Required fe_groups memberships for display of a result. var $domain_records = array(); // Domain records (?) - var $wSelClauses = array(); // Select clauses for individual words var $resultSections = array(); // Page tree sections for search result. var $external_parsers = array(); // External parser objects var $iconFileNameCache = array(); // Storage of icons.... + var $templateCode; // Will hold the content of $conf['templateFile'] + var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results'; + var $indexerConfig = array(); // Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search'] + var $enableMetaphoneSearch = false; + var $storeMetaphoneInfoAsWords; + + /** * Lexer object * @@ -162,14 +168,8 @@ */ var $lexerObj; - /** - * Indexer object - * - * @var tx_indexedsearch_indexer - */ - var $indexerObj; - var $templateCode; // Will hold the content of $conf['templateFile'] - var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results'; + const WILDCARD_LEFT = 1; + const WILDCARD_RIGHT = 2; /** @@ -186,9 +186,6 @@ $this->pi_loadLL(); $this->pi_setPiVarDefaults(); - // Initialize the indexer-class - just to use a few function (for making hashes) - $this->indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); - // Initialize: $this->initialize(); @@ -203,8 +200,8 @@ $this->printRules(). $content; - return $this->pi_wrapInBaseClass($content); - } + return $this->pi_wrapInBaseClass($content); + } /** * Initialize internal variables, especially selector box values for the search form and search words @@ -214,6 +211,11 @@ function initialize() { global $TYPO3_CONF_VARS; + // Indexer configuration from Extension Manager interface: + $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); + $this->enableMetaphoneSearch = $this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0; + $this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : true; + // Initialize external document parsers for icon display and other soft operations if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) { foreach ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) { @@ -305,6 +307,10 @@ ) ); + if (!$this->enableMetaphoneSearch) { + unset ($this->optValues['type']['10']); // Remove this option if metaphone search is disabled) + } + // Free Index Uid: if ($this->conf['search.']['defaultFreeIndexUidList']) { $uidList = t3lib_div::intExplode(',', $this->conf['search.']['defaultFreeIndexUidList']); @@ -378,8 +384,10 @@ // Add search languages: $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'sys_language', '1=1'.$this->cObj->enableFields('sys_language')); - while($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { - $this->optValues['lang'][$lR['uid']] = $lR['title']; + if ($res) { + while ($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + $this->optValues['lang'][$lR['uid']] = $lR['title']; + } } // Calling hook for modification of initialized content @@ -416,11 +424,11 @@ } /** - * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holds the SQL operator (eg. AND, OR) + * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holding the SQL operator (eg. AND, OR) * * Only words with 2 or more characters are accepted * Max 200 chars total - * Space is used to split words, "" can be used search for a whole string (not indexed search then) + * Space is used to split words, "" can be used search for a whole string * AND, OR and NOT are prefix words, overruling the default operator * +/|/- equals AND, OR and NOT as operators. * All search words are converted to lowercase. @@ -438,12 +446,18 @@ $inSW = $GLOBALS['TSFE']->csConvObj->utf8_encode($inSW, $GLOBALS['TSFE']->metaCharset); $inSW = $GLOBALS['TSFE']->csConvObj->entities_to_utf8($inSW,TRUE); + $sWordArray = false; if ($hookObj = &$this->hookRequest('getSearchWords')) { - return $hookObj->getSearchWords_splitSWords($inSW, $defOp); + $sWordArray = $hookObj->getSearchWords_splitSWords($inSW, $defOp); } else { - if ($this->piVars['type']==20) { - return array(array('sword'=>trim($inSW), 'oper'=>'AND')); + if ($this->piVars['type']==20) { // Sentence + $sWordArray = array( + array( + 'sword' => trim($inSW), + 'oper' => 'AND' + ) + ); } else { $search = t3lib_div::makeInstance('tslib_search'); $search->default_operator = $defOp==1 ? 'OR' : 'AND'; @@ -451,10 +465,12 @@ $search->register_and_explode_search_string($inSW); if (is_array($search->sword_array)) { - return $this->procSearchWordsByLexer($search->sword_array); + $sWordArray = $this->procSearchWordsByLexer($search->sword_array); } } } + + return $sWordArray; } /** @@ -570,7 +586,11 @@ // Getting SQL result pointer: $GLOBALS['TT']->push('Searching result'); - $res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid); + if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) { + $res = $hookObj->getResultRows_SQLpointer($sWArr,$freeIndexUid); + } else { + $res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid); + } $GLOBALS['TT']->pull(); // Organize and process result: @@ -592,6 +612,14 @@ // Each row should contain the fields from 'ISEC.*, IP.*' combined + artificial fields "show_resume" (boolean) and "result_number" (counter) while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if (!$this->checkExistance($row)) { + // Check if the record is still available or if it has been deleted meanwhile. + // Currently this works for files only, since extending it to content elements would cause a lot of overhead... + // Otherwise, skip the row. + $count--; + continue; + } + // Set first row: if (!$c) { $firstRow = $row; @@ -611,12 +639,14 @@ $c++; // Increase the result pointer // All rows for display is put into resultRows[] - if ($c > $pointer * $this->piVars['results']) { + if ($c > $pointer * $this->piVars['results'] && $c <= ($pointer+1) * $this->piVars['results']) { $row['result_number'] = $c; $resultRows[] = $row; - // This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above). - if (!$exactCount && (($c+1) > ($pointer+1)*$this->piVars['results'])) { break; } } + // This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above). + if (!$exactCount && (($c+1) > ($pointer+1) * $this->piVars['results'])) { + break; + } } else { $count--; // Skip this row if the user cannot view it (missing permission) } @@ -626,10 +656,10 @@ } return array( - 'resultRows' => $resultRows, - 'firstRow' => $firstRow, - 'count' => $count - ); + 'resultRows' => $resultRows, + 'firstRow' => $firstRow, + 'count' => $count + ); } else { // No results found: return FALSE; } @@ -643,7 +673,7 @@ * @return pointer */ function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1) { - // This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches. + // This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches. $list = $this->getPhashList($sWArr); // Perform SQL Search / collection of result rows array: @@ -825,60 +855,52 @@ // Initialize variables: $c=0; $totalHashList = array(); // This array accumulates the phash-values - $this->wSelClauses = array(); // Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator) foreach ($sWArr as $k => $v) { // Making the query for a single search word based on the search-type $sWord = $v['sword']; // $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower'); // lower-case all of them... $theType = (string)$this->piVars['type']; - if (strstr($sWord,' ')) $theType = 20; // If there are spaces in the search-word, make a full text search instead. + if (strstr($sWord,' ')) { + $theType = 20; // If there are spaces in the search-word, make a full text search instead. + } $GLOBALS['TT']->push('SearchWord "'.$sWord.'" - $theType='.$theType); - $res = ''; - $wSel=''; - // Perform search for word: - switch($theType) { + switch ($theType) { case '1': // Part of word - $wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'"; - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchWord($sWord, self::WILDCARD_LEFT | self::WILDCARD_RIGHT); break; case '2': // First part of word - $wSel = "IW.baseword LIKE '".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'"; - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchWord($sWord, self::WILDCARD_RIGHT); break; case '3': // Last part of word - $wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."'"; - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchWord($sWord, self::WILDCARD_LEFT); break; case '10': // Sounds like - $wSel = 'IW.metaphone = '.$this->indexerObj->metaphone($sWord); - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + + /** + * Indexer object + * + * @var tx_indexedsearch_indexer + */ + $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); // Initialize the indexer-class + + // Perform metaphone search + $res = $this->searchMetaphone($indexerObj->metaphone($sWord,$this->storeMetaphoneInfoAsWords)); + + unset($indexerObj); break; case '20': // Sentence - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( - 'ISEC.phash', - 'index_section ISEC, index_fulltext IFT', - 'IFT.fulltextdata LIKE \'%'.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_fulltext').'%\' AND - ISEC.phash = IFT.phash - '.$this->sectionTableWhere(), - 'ISEC.phash' - ); - $wSel = '1=1'; - - if ($this->piVars['type']==20) $this->piVars['order'] = 'mtime'; // If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instaed. It is not required, but otherwise some hits may be left out. + $res = $this->searchSentence($sWord); + $this->piVars['order'] = 'mtime'; // If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instead. It is not required, but otherwise some hits may be left out. break; default: // Distinct word - $wSel = 'IW.wid = '.$hash = $this->indexerObj->md5inthash($sWord); - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchDistinct($sWord); break; } - // Accumulate the word-select clauses - $this->wSelClauses[] = $wSel; - // If there was a query to do, then select all phash-integers which resulted from this. if ($res) { @@ -922,6 +944,7 @@ * @return pointer SQL result pointer */ function execPHashListQuery($wordSel,$plusQ='') { + return $GLOBALS['TYPO3_DB']->exec_SELECTquery( 'IR.phash', 'index_words IW, @@ -937,28 +960,82 @@ } /** + * Search for a word + * + * @param TODO + * @param TODO + * @return pointer SQL result pointer + */ + function searchWord($sWord, $mode) { + $wildcard_left = ($mode & WILDCARD_LEFT) ? '%' : ''; + $wildcard_right = ($mode & WILDCARD_RIGHT) ? '%' : ''; + + $wSel = 'IW.baseword LIKE \''.$wildcard_left.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words').$wildcard_right.'\''; + $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + return $res; + } + + /** + * Search for one distinct word + * + * @return pointer SQL result pointer + */ + function searchDistinct($sWord) { + $wSel = 'IW.wid='.$this->md5inthash($sWord); + $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + return $res; + } + + /** + * Search for a sentence + * + * @return pointer SQL result pointer + */ + function searchSentence($sWord) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( + 'ISEC.phash', + 'index_section ISEC, index_fulltext IFT', + 'IFT.fulltextdata LIKE \'%'.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_fulltext').'%\' AND + ISEC.phash = IFT.phash + '.$this->sectionTableWhere(), + 'ISEC.phash' + ); + return $res; + } + + /** + * Search for a metaphone word + * + * @return pointer SQL result pointer + */ + function searchMetaphone($sWord) { + $wSel = 'IW.metaphone='.$sWord; + $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + } + + /** * Returns AND statement for selection of section in database. (rootlevel 0-2 + page_id) * * @return string AND clause for selection of section in database. */ function sectionTableWhere() { - $out = $this->wholeSiteIdList<0 ? '' : 'AND ISEC.rl0 IN ('.$this->wholeSiteIdList.')'; + $out = $this->wholeSiteIdList<0 ? '' : ' AND ISEC.rl0 IN ('.$this->wholeSiteIdList.')'; $match = ''; if (substr($this->piVars['sections'],0,4)=='rl1_') { $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],4))); - $out.= 'AND ISEC.rl1 IN ('.$list.')'; + $out.= ' AND ISEC.rl1 IN ('.$list.')'; $match = TRUE; } elseif (substr($this->piVars['sections'],0,4)=='rl2_') { $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],4))); - $out.= 'AND ISEC.rl2 IN ('.$list.')'; + $out.= ' AND ISEC.rl2 IN ('.$list.')'; $match = TRUE; } elseif (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) { // Traversing user configured fields to see if any of those are used to limit search to a section: foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) { if (substr($this->piVars['sections'],0,strlen($fieldName)+1)==$fieldName.'_') { $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],strlen($fieldName)+1))); - $out.= 'AND ISEC.'.$fieldName.' IN ('.$list.')'; + $out.= ' AND ISEC.'.$fieldName.' IN ('.$list.')'; $match = TRUE; break; } @@ -990,18 +1067,18 @@ */ function mediaTypeWhere() { - switch((string)$this->piVars['media']) { + switch ((string)$this->piVars['media']) { case '0': // '0' => 'Kun TYPO3 sider', - $out = 'AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; + $out = ' AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; break; case '-2': // All external documents - $out = 'AND IP.item_type!='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; + $out = ' AND IP.item_type!='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; break; case '-1': // All content - $out=''; + $out = ''; break; default: - $out = 'AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->piVars['media'], 'index_phash'); + $out = ' AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->piVars['media'], 'index_phash'); break; } @@ -1092,44 +1169,14 @@ while(list(,$rootId)=each($siteIdNumbers)) { $id_list[] = $this->cObj->getTreeList($rootId,9999,0,0,'','').$rootId; } - $page_where = 'ISEC.page_id IN ('.implode(',',$id_list).')'; + $page_where = ' ISEC.page_id IN ('.implode(',',$id_list).')'; } else { // Disable everything... (select all) - $page_where = ' 1=1 '; + $page_where = ' 1=1'; } - // If any of the ranking sortings are selected, we must make a join with the word/rel-table again, because we need to calculate ranking based on all search-words found. - if (substr($this->piVars['order'],0,5)=='rank_') { - /* - OK there were some fancy calculations promoted by Graeme Merrall: - - "However, regarding relevance you probably want to look at something like - Salton's formula which is a good easy way to measure relevance. - Oracle Intermedia uses this and it's pretty simple: - Score can be between 0 and 100, but the top-scoring document in the query - will not necessarily have a score of 100 -- scoring is relative, not - absolute. This means that scores are not comparable across indexes, or even - across different queries on the same index. Score for each document is - computed using the standard Salton formula: - - 3f(1+log(N/n)) - - Where f is the frequency of the search term in the document, N is the total - number of rows in the table, and n is the number of rows which contain the - search term. This is converted into an integer in the range 0 - 100. - - There's a good doc on it at - http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/ - although it may be a little complex for what you require so just pick the - relevant parts out. - " - - However I chose not to go with this for several reasons. - I do not claim that my ways of calculating importance here is the best. - ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.) - */ - - switch($this->piVars['order']) { + if (substr($this->piVars['order'],0,5)=='rank_') { + switch ($this->piVars['order']) { case 'rank_flag': // This gives priority to word-position (max-value) so that words in title, keywords, description counts more than in content. // The ordering is refined with the frequency sum as well. $grsel = 'MAX(IR.flags) AS order_val1, SUM(IR.freq) AS order_val2'; @@ -1149,10 +1196,7 @@ break; } - // So, words are imploded into an OR statement (no "sentence search" should be done here - may deselect results) - $wordSel='('.implode(' OR ',$this->wSelClauses).') AND '; - - return $GLOBALS['TYPO3_DB']->exec_SELECTquery( + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( 'ISEC.*, IP.*, ' .$grsel, 'index_words IW, @@ -1160,18 +1204,18 @@ index_section ISEC, index_phash IP'. $page_join, - $wordSel.' - IP.phash IN ('.$list.') '. + 'IP.phash IN ('.$list.') '. $this->mediaTypeWhere().' '. $this->languageWhere(). $freeIndexUidClause.' AND IW.wid=IR.wid AND ISEC.phash = IR.phash AND IP.phash = IR.phash - AND '.$page_where, + AND '.$page_where, 'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2 ,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId', $orderBy ); + } else { // Otherwise, if sorting are done with the pages table or other fields, there is no need for joining with the rel/word tables: $orderBy = ''; @@ -1187,7 +1231,7 @@ break; } - return $GLOBALS['TYPO3_DB']->exec_SELECTquery( + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( 'ISEC.*, IP.*', 'index_phash IP,index_section ISEC'.$page_join, 'IP.phash IN ('.$list.') '. @@ -1200,6 +1244,8 @@ $orderBy ); } + + return $res; } /** @@ -1224,8 +1270,13 @@ // "phash_t3" is the phash of the parent TYPO3 page row which initiated the indexing of the documents in this section. // So, selecting for the grlist records belonging to the parent phash-row where the current users gr_list exists will help us to know. // If this is NOT found, there is still a theoretical possibility that another user accessible page would display a link, so maybe the resume of such a document here may be unjustified hidden. But better safe than sorry. - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash_t3']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); - if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash_t3']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); + } else { + $res = false; + } + + if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)) { #debug("Look up for external media '".$row['data_filename']."': phash:".$row['phash_t3'].' YES - ('.$GLOBALS['TSFE']->gr_list.")!",1); return TRUE; } else { @@ -1235,8 +1286,13 @@ } else { // Ordinary TYPO3 pages: if (strcmp($row['gr_list'],$GLOBALS['TSFE']->gr_list)) { // Selecting for the grlist records belonging to the phash-row where the current users gr_list exists. If it is found it is proof that this user has direct access to the phash-rows content although he did not himself initiate the indexing... - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); - if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); + } else { + $res = false; + } + + if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)) { #debug('Checking on it ...'.$row['item_title'].'/'.$row['phash'].' - YES ('.$GLOBALS['TSFE']->gr_list.")",1); return TRUE; } else { @@ -1251,6 +1307,25 @@ } /** + * Check if the record is still available or if it has been deleted meanwhile. + * Currently this works for files only, since extending it to page content would cause a lot of overhead... + * + * @param array Result row array + * @return boolean Returns true if record is still available + */ + function checkExistance($row) { + $recordExists = true; // Always expect that page content exists + + if ($row['item_type']) { // External media: + if (!is_file($row['data_filename']) || !file_exists($row['data_filename'])) { + $recordExists = false; + } + } + + return $recordExists; + } + + /** * Returns "DESC" or "" depending on the settings of the incoming highest/lowest result order (piVars['desc'] * * @param boolean If true, inverse the order which is defined by piVars['desc'] @@ -1298,7 +1373,27 @@ } } + /** + * md5 integer hash + * Using 7 instead of 8 just because that makes the integers lower than 32 bit (28 bit) and so they do not interfere with UNSIGNED integers or PHP-versions which has varying output from the hexdec function. + * + * @param string String to hash + * @return integer Integer intepretation of the md5 hash of input string. + */ + function md5inthash($str) { + return tx_indexedsearch_indexer::md5inthash($str); + } + /** + * Check if the tables provided are configured for usage. + * This becomes neccessary for extensions that provide additional database functionality like indexed_search_mysql. + * + * @param string Comma-separated list of tables + * @return boolean True if given tables are enabled + */ + function isTableUsed($table_list) { + return tx_indexedsearch_indexer::isTableUsed($table_list); + } @@ -1310,6 +1405,8 @@ + + /*********************************** * * HTML output functions @@ -1328,7 +1425,12 @@ // Multilangual text $substituteArray = array('searchFor', 'extResume', 'atATime', 'orderBy', 'fromSection', 'searchIn', 'match', 'style', 'freeIndexUid'); foreach ($substituteArray as $marker) { - $markerArray['###FORM_'.t3lib_div::strtoupper($marker).'###'] = $this->pi_getLL('form_'.$marker,'',1); + if (t3lib_div::int_from_ver(TYPO3_version) >= t3lib_div::int_from_ver('4.2')) { + $markerArray['###FORM_'.t3lib_div::strtoupper($marker).'###'] = $this->pi_getLL('form_'.$marker,'',1); + } else { + // TODO: This is a workaround for TYPO3 4.1 which misses t3lib_div::strtoupper() - can be removed when 4.1 isn't used any longer + $markerArray['###FORM_'.strtoupper($marker).'###'] = $this->pi_getLL('form_'.$marker,'',1); + } } $markerArray['###FORM_SUBMIT###'] = $this->pi_getLL('submit_button_label','',1); @@ -1596,7 +1698,12 @@ if (is_array($tmplContent)) { foreach ($tmplContent AS $k => $v) { - $markerArray['###'.t3lib_div::strtoupper($k).'###'] = $v; + if (t3lib_div::int_from_ver(TYPO3_version) >= t3lib_div::int_from_ver('4.2')) { + $markerArray['###'.t3lib_div::strtoupper($k).'###'] = $v; + } else { + // TODO: This is a workaround for TYPO3 4.1 which misses t3lib_div::strtoupper() - can be removed when 4.1 isn't used any longer + $markerArray['###'.strtoupper($k).'###'] = $v; + } } } @@ -1955,13 +2062,20 @@ if ($row['show_resume']) { if (!$noMarkup) { $markedSW = ''; - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_fulltext', 'phash='.intval($row['phash'])); - if ($ftdrow = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { - // Cut HTTP references after some length - $content = preg_replace('/(http:\/\/[^ ]{60})([^ ]+)/i', '$1...', $ftdrow['fulltextdata']); - $markedSW = $this->markupSWpartsOfString($content); + if ($this->isTableUsed('index_fulltext')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_fulltext', 'phash='.intval($row['phash'])); + } else { + $res = false; } - $GLOBALS['TYPO3_DB']->sql_free_result($res); + + if ($res) { + if ($ftdrow = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + // Cut HTTP references after some length + $content = preg_replace('/(http:\/\/[^ ]{60})([^ ]+)/i', '$1...', $ftdrow['fulltextdata']); + $markedSW = $this->markupSWpartsOfString($content); + } + $GLOBALS['TYPO3_DB']->sql_free_result($res); + } } if (!trim($markedSW)) { Index: typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php =================================================================== --- typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php (Revision 4388) +++ typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php (Arbeitskopie) @@ -109,6 +109,8 @@ var $allPhashListed = array(); // phash values accumulations for link to clear all var $external_parsers = array(); // External content parsers - objects set here with file extensions as keys. var $iconFileNameCache = array(); // File extensions - icon map/cache. + var $indexerConfig = array(); // Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search'] + var $enableMetaphoneSearch = false; /** * Indexer object @@ -154,6 +156,12 @@ // Return if no page id: if ($this->pObj->id<=0) return; + // Indexer configuration from Extension Manager interface: + $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); + + // Workaround: If the extension configuration was not updated yet, the value is not existing + $this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0) : 1; + // Initialize max-list items $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100; @@ -206,7 +214,7 @@ $theOutput.=$this->pObj->doc->spacer(5); $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1); - } elseif (t3lib_div::_GET('metaphone')) { + } elseif ($this->enableMetaphoneSearch && t3lib_div::_GET('metaphone')) { // Show title / function menu: $theOutput.=$this->pObj->doc->spacer(5); $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1); @@ -677,12 +685,14 @@ $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin(); $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec); - // Group metaphone hash: - $metaphone = array(); - foreach($ftrows as $row) { - $metaphone[$row['metaphone']][] = $row['baseword']; + if ($this->enableMetaphoneSearch) { + // Group metaphone hash: + $metaphone = array(); + foreach ($ftrows as $row) { + $metaphone[$row['metaphone']][] = $row['baseword']; + } + $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:'); } - $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:'); // Finding top-20 on frequency for this phash: $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( Index: typo3/sysext/indexed_search/ext_conf_template.txt =================================================================== --- typo3/sysext/indexed_search/ext_conf_template.txt (Revision 4388) +++ typo3/sysext/indexed_search/ext_conf_template.txt (Arbeitskopie) @@ -28,6 +28,9 @@ # cat=basic; type=boolean; label=Disable Indexing in Frontend: By default pages are indexed during viewing of pages in the frontend. You can disable this features so indexing of pages is only initiated through the backend page crawler. disableFrontendIndexing = 0 + # cat=basic; type=int; label=Enable metaphone search (sounds like). 0=disabled, 1=use internal metaphone parser, 2=use advanced doubleMetaphone parser. +enableMetaphoneSearch = 1 + # cat=basic; type=int; label=Min TTL (hours) for indexed page: The time in hours that must pass before an indexed page can be indexed again regardless of changes on the page. minAge = 24 @@ -40,7 +43,7 @@ # cat=basic; type=boolean; label=Use "crawler" extension to index external files: When external files are found on a page they are added to the "crawler" extensions queue and indexed via the cronscript running the crawler. This eliminates problems with for example many PDF files on a page. Requires a proper configuration of the "crawler" extension. useCrawlerForExternalFiles = 0 - # cat=basic; type=int; label=Bitmask for Flags (Advanced): By this value (0-255) you can filter the importance of (128), <keywords> (64) and <description> (32) content from HTML documents. By default none of these will have any importance over the other. Setting the value to eg. 192 means that title-tag content and meta-keywords will be flagged (and rate higher in search results) + # cat=basic; type=int; label=Bitmask for Flags (Advanced): By this value (0-255) you can filter the importance of <title> (128), <keywords> (64) and <description> (32) content from HTML documents. By setting this to 0, none of these fields will have any importance over the other. The default value 192 means that title-tag content and meta-keywords will be flagged (and rated higher in search results) flagBitMask = 192 # cat=basic; type=string; label=Ignore Extensions: List of file extensions that the external parser will ignore (despite having support for them). Comma list.