diff -rNu typo3_src-4.2.2/t3lib/class.t3lib_tcemain.php typo3_src-4.2.2_mysql_fulltext_index/t3lib/class.t3lib_tcemain.php --- typo3_src-4.2.2/t3lib/class.t3lib_tcemain.php 2008-10-06 12:12:10.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/t3lib/class.t3lib_tcemain.php 2008-11-04 00:59:01.000000000 +0100 @@ -7022,7 +7022,8 @@ if ($this->admin || $this->BE_USER->getTSConfigVal('options.clearCache.all')) { if (t3lib_extMgm::isLoaded('cms')) { $this->internal_clearPageCache(); - $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pagesection',''); + $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_pagesection', ''); + $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_treelist', ''); } $GLOBALS['TYPO3_DB']->exec_DELETEquery('cache_hash',''); diff -rNu typo3_src-4.2.2/typo3/sysext/cms/ext_localconf.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/ext_localconf.php --- typo3_src-4.2.2/typo3/sysext/cms/ext_localconf.php 2008-10-06 12:12:26.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/ext_localconf.php 2008-11-04 00:59:01.000000000 +0100 @@ -21,4 +21,10 @@ ); + // registering hooks for the treelist cache +$TYPO3_CONF_VARS['SC_OPTIONS']['t3lib/class.t3lib_tcemain.php']['processDatamapClass'][] = 'EXT:cms/tslib/hooks/class.tx_cms_treelistcacheupdate.php:&tx_cms_treelistCacheUpdate'; +$TYPO3_CONF_VARS['SC_OPTIONS']['t3lib/class.t3lib_tcemain.php']['processCmdmapClass'][] = 'EXT:cms/tslib/hooks/class.tx_cms_treelistcacheupdate.php:&tx_cms_treelistCacheUpdate'; +$TYPO3_CONF_VARS['SC_OPTIONS']['t3lib/class.t3lib_tcemain.php']['moveRecordClass'][] = 'EXT:cms/tslib/hooks/class.tx_cms_treelistcacheupdate.php:&tx_cms_treelistCacheUpdate'; + + ?> \ Kein Zeilenumbruch am Dateiende. diff -rNu typo3_src-4.2.2/typo3/sysext/cms/ext_tables.sql typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/ext_tables.sql --- typo3_src-4.2.2/typo3/sysext/cms/ext_tables.sql 2008-10-06 12:12:26.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/ext_tables.sql 2008-11-04 00:59:01.000000000 +0100 @@ -73,6 +73,19 @@ # +# Table structure for table 'cache_treelist' +# +CREATE TABLE cache_treelist ( + md5hash varchar(32) DEFAULT '' NOT NULL, + pid int(11) DEFAULT '0' NOT NULL, + treelist text NOT NULL, + tstamp int(11) DEFAULT '0' NOT NULL, + expires int(11) unsigned DEFAULT '0' NOT NULL, + PRIMARY KEY (md5hash) +) ENGINE=InnoDB; + + +# # Table structure for table 'fe_groups' # CREATE TABLE fe_groups ( diff -rNu typo3_src-4.2.2/typo3/sysext/cms/tslib/class.tslib_content.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/tslib/class.tslib_content.php --- typo3_src-4.2.2/typo3/sysext/cms/tslib/class.tslib_content.php 2008-10-06 12:12:26.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/tslib/class.tslib_content.php 2008-11-04 00:59:01.000000000 +0100 @@ -6484,13 +6484,19 @@ } /** - * Generates a list of Page-uid's from $id. List does not include $id itself (unless the id specified is negative in which case it does!) + * Generates a list of Page-uid's from $id. List does not include $id itself + * (unless the id specified is negative in which case it does!) * The only pages WHICH PREVENTS DECENDING in a branch are * - deleted pages, - * - pages in a recycler or of the Backend User Section type - * - pages that has the extendToSubpages set, WHERE start/endtime, hidden and fe_users would hide the records. - * Apart from that, pages with enable-fields excluding them, will also be removed. HOWEVER $dontCheckEnableFields set will allow enableFields-excluded pages to be included anyway - including extendToSubpages sections! - * Mount Pages are also descended but notice that these ID numbers are not useful for links unless the correct MPvar is set. + * - pages in a recycler (doktype = 255) or of the Backend User Section (doktpe = 6) type + * - pages that has the extendToSubpages set, WHERE start/endtime, hidden + * and fe_users would hide the records. + * Apart from that, pages with enable-fields excluding them, will also be + * removed. HOWEVER $dontCheckEnableFields set will allow + * enableFields-excluded pages to be included anyway - including + * extendToSubpages sections! + * Mount Pages are also descended but notice that these ID numbers are not + * useful for links unless the correct MPvar is set. * * @param integer The id of the start page from which point in the page tree to decend. IF NEGATIVE the id itself is included in the end of the list (only if $begin is 0) AND the output does NOT contain a last comma. Recommended since it will resolve the input ID for mount pages correctly and also check if the start ID actually exists! * @param integer The number of levels to decend. If you want to decend infinitely, just set this to 100 or so. Should be at least "1" since zero will just make the function return (no decend...) @@ -6503,84 +6509,143 @@ * @return string Returns the list with a comma in the end (if any pages selected and not if $id is negative and $id is added itself) - which means the input page id can comfortably be appended to the output string if you need it to. * @see tslib_fe::checkEnableFields(), tslib_fe::checkPagerecordForIncludeSection() */ - function getTreeList($id,$depth,$begin=0,$dontCheckEnableFields=FALSE,$addSelectFields='',$moreWhereClauses='', $prevId_array=array(), $recursionLevel=0) { + public function getTreeList($id, $depth, $begin = 0, $dontCheckEnableFields = false, $addSelectFields = '', $moreWhereClauses = '', array $prevId_array = array(), $recursionLevel = 0) { // Init vars: - $allFields = 'uid,hidden,starttime,endtime,fe_group,extendToSubpages,doktype,php_tree_stop,mount_pid,mount_pid_ol,t3ver_state'.$addSelectFields; - $depth = intval($depth); - $begin = intval($begin); - $id = intval($id); - $theList = ''; - $addId = 0; + $allFields = 'uid,hidden,starttime,endtime,fe_group,extendToSubpages,doktype,php_tree_stop,mount_pid,mount_pid_ol,t3ver_state'.$addSelectFields; + $depth = intval($depth); + $begin = intval($begin); + $id = intval($id); + $theList = ''; + $addId = 0; + $requestHash = ''; - if ($id) { + if ($id) { // First level, check id (second level, this is done BEFORE the recursive call) - if (!$recursionLevel) { + if (!$recursionLevel) { + + // check cache + + // first, create the hash for this request - not sure yet whether we need all these parameters though + $parameters = array( + $id, + $depth, + $begin, + $dontCheckEnableFields, + $addSelectFields, + $moreWhereClauses, + $prevId_array + ); + $requestHash = md5(serialize($parameters)); + + $cacheEntry = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( + 'treelist', + 'cache_treelist', + 'md5hash = \'' . $requestHash . '\' AND ( expires > ' . time() . ' OR expires = 0 )' + ); + + if(!empty($cacheEntry[0]['treelist'])) { + // cache hit + t3lib_div::devLog('Cache Treelist: HIT', 'tslib_cObj'); + return $cacheEntry[0]['treelist']; + } else { + // cache miss + t3lib_div::devLog('Cache Treelist: MISS', 'tslib_cObj'); + } + // If Id less than zero it means we should add the real id to list: - if ($id < 0) { + if ($id < 0) { $addId = $id = abs($id); } // Check start page: - if ($GLOBALS['TSFE']->sys_page->getRawRecord('pages',$id,'uid')) { + if ($GLOBALS['TSFE']->sys_page->getRawRecord('pages', $id, 'uid')) { // Find mount point if any: $mount_info = $GLOBALS['TSFE']->sys_page->getMountPointInfo($id); - if (is_array($mount_info)) { + if (is_array($mount_info)) { $id = $mount_info['mount_pid']; // In Overlay mode, use the mounted page uid as added ID!: - if ($addId && $mount_info['overlay']) { + if ($addId && $mount_info['overlay']) { $addId = $id; } } - } else return ''; // Return blank if the start page was NOT found at all! + } else { + return ''; // Return blank if the start page was NOT found at all! + } } // Add this ID to the array of IDs - if ($begin<=0) { + if ($begin <= 0) { $prevId_array[] = $id; } // Select sublevel: - if ($depth>0) { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery($allFields, 'pages', 'pid='.intval($id).' AND deleted=0 '.$moreWhereClauses, '' ,'sorting'); - while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { - $GLOBALS['TSFE']->sys_page->versionOL('pages',$row); - - if ($row['doktype']==255 || $row['doktype']==6 || $row['t3ver_state']>0) { unset($row); } // Doing this after the overlay to make sure changes in the overlay are respected. - - if (is_array($row)) { - // Find mount point if any: - $next_id = $row['uid']; - $mount_info = $GLOBALS['TSFE']->sys_page->getMountPointInfo($next_id, $row); - // Overlay mode: - if (is_array($mount_info) && $mount_info['overlay']) { - $next_id = $mount_info['mount_pid']; - $res2 = $GLOBALS['TYPO3_DB']->exec_SELECTquery($allFields, 'pages', 'uid='.intval($next_id).' AND deleted=0 '.$moreWhereClauses, '' ,'sorting'); - $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res2); - $GLOBALS['TYPO3_DB']->sql_free_result($res2); - $GLOBALS['TSFE']->sys_page->versionOL('pages',$row); + if ($depth > 0) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( + $allFields, + 'pages', + 'pid = '.intval($id).' AND deleted = 0 '.$moreWhereClauses, + '', + 'sorting' + ); + + while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + $GLOBALS['TSFE']->sys_page->versionOL('pages', $row); + + if ($row['doktype'] == 255 || $row['doktype'] == 6 || $row['t3ver_state'] > 0) { + // Doing this after the overlay to make sure changes + // in the overlay are respected. + // However, we do not process pages below of and + // including of type recycler and BE user section + continue; + } + + // Find mount point if any: + $next_id = $row['uid']; + $mount_info = $GLOBALS['TSFE']->sys_page->getMountPointInfo($next_id, $row); - if ($row['doktype']==255 || $row['doktype']==6 || $row['t3ver_state']>0) { unset($row); } // Doing this after the overlay to make sure changes in the overlay are respected. + // Overlay mode: + if (is_array($mount_info) && $mount_info['overlay']) { + $next_id = $mount_info['mount_pid']; + + $res2 = $GLOBALS['TYPO3_DB']->exec_SELECTquery( + $allFields, + 'pages', + 'uid = '.intval($next_id).' AND deleted = 0 '.$moreWhereClauses, + '' , + 'sorting' + ); + $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res2); + $GLOBALS['TYPO3_DB']->sql_free_result($res2); + + $GLOBALS['TSFE']->sys_page->versionOL('pages', $row); + + if ($row['doktype'] == 255 || $row['doktype'] == 6 || $row['t3ver_state'] > 0) { + // Doing this after the overlay to make sure + // changes in the overlay are respected. + + // see above + continue; } - // Add record: - if (is_array($row) && ($dontCheckEnableFields || $GLOBALS['TSFE']->checkPagerecordForIncludeSection($row))) { - // Add ID to list: - if ($begin<=0) { - if ($dontCheckEnableFields || $GLOBALS['TSFE']->checkEnableFields($row)) { - $theList.= $next_id.','; - } + } + // Add record: + if ($dontCheckEnableFields || $GLOBALS['TSFE']->checkPagerecordForIncludeSection($row)) { + // Add ID to list: + if ($begin <= 0) { + if ($dontCheckEnableFields || $GLOBALS['TSFE']->checkEnableFields($row)) { + $theList.= $next_id.','; } - // Next level: - if ($depth>1 && !$row['php_tree_stop']) { - // Normal mode: - if (is_array($mount_info) && !$mount_info['overlay']) { - $next_id = $mount_info['mount_pid']; - } - // Call recursively, if the id is not in prevID_array: - if (!in_array($next_id,$prevId_array)) { - $theList.= tslib_cObj::getTreeList($next_id, $depth-1, $begin-1, $dontCheckEnableFields, $addSelectFields, $moreWhereClauses, $prevId_array, $recursionLevel+1); - } + } + // Next level: + if ($depth > 1 && !$row['php_tree_stop']) { + // Normal mode: + if (is_array($mount_info) && !$mount_info['overlay']) { + $next_id = $mount_info['mount_pid']; + } + // Call recursively, if the id is not in prevID_array: + if (!in_array($next_id, $prevId_array)) { + $theList.= tslib_cObj::getTreeList($next_id, $depth-1, $begin-1, $dontCheckEnableFields, $addSelectFields, $moreWhereClauses, $prevId_array, $recursionLevel+1); } } } @@ -6589,14 +6654,24 @@ } } // If first run, check if the ID should be returned: - if (!$recursionLevel) { - if ($addId) { - if ($begin>0) { + if (!$recursionLevel) { + if ($addId) { + if ($begin > 0) { $theList.= 0; } else { $theList.= $addId; } } + + $GLOBALS['TYPO3_DB']->exec_INSERTquery( + 'cache_treelist', + array( + 'md5hash' => $requestHash, + 'pid' => $id, + 'treelist' => $theList, + 'tstamp' => time() + ) + ); } // Return list: return $theList; diff -rNu typo3_src-4.2.2/typo3/sysext/cms/tslib/class.tslib_fe.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/tslib/class.tslib_fe.php --- typo3_src-4.2.2/typo3/sysext/cms/tslib/class.tslib_fe.php 2008-10-06 12:12:26.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/tslib/class.tslib_fe.php 2008-11-04 00:59:01.000000000 +0100 @@ -3031,7 +3031,7 @@ * @see INTincScript() */ protected function INTincScript_includeLibs($INTiS_config) { - global $TYPO3_CONF_VARS; + global $TYPO3_CONF_VARS, $TCA; $GLOBALS['TT']->push('Include libraries'); foreach($INTiS_config as $INTiS_cPart) { @@ -4593,5 +4593,4 @@ if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['tslib/class.tslib_fe.php']) { include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['tslib/class.tslib_fe.php']); } - ?> diff -rNu typo3_src-4.2.2/typo3/sysext/cms/tslib/hooks/class.tx_cms_treelistcacheupdate.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/tslib/hooks/class.tx_cms_treelistcacheupdate.php --- typo3_src-4.2.2/typo3/sysext/cms/tslib/hooks/class.tx_cms_treelistcacheupdate.php 1970-01-01 01:00:00.000000000 +0100 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/cms/tslib/hooks/class.tx_cms_treelistcacheupdate.php 2008-11-04 00:59:01.000000000 +0100 @@ -0,0 +1,453 @@ + + * @package TYPO3 + * @subpackage tslib + */ +class tx_cms_treelistCacheUpdate { + + // should not be manipulated from others except through the + // configuration provided @see __construct() + private $updateRequiringFields = array( + 'pid', + 'php_tree_stop', + 'extendToSubpages' + ); + + /** + * constructor, adds update requiring fields to the default ones + * + */ + public function __construct() { + + // as enableFields can be set dynamically we add them here + $pagesEnableFields = $GLOBALS['TCA']['pages']['ctrl']['enablecolumns']; + foreach($pagesEnableFields as $pagesEnableField) { + $this->updateRequiringFields[] = $pagesEnableField; + } + $this->updateRequiringFields[] = $GLOBALS['TCA']['pages']['ctrl']['delete']; + + // extension can add fields to the pages table that require an + // update of the treelist cache, too; so we also add those + // example: $TYPO3_CONF_VARS['BE']['additionalTreelistUpdateFields'] .= ',my_field'; + if (!empty($GLOBALS['TYPO3_CONF_VARS']['BE']['additionalTreelistUpdateFields'])) { + $additionalTreelistUpdateFields = t3lib_div::trimExplode( + ',', + $GLOBALS['TYPO3_CONF_VARS']['BE']['additionalTreelistUpdateFields'], + TRUE + ); + + foreach($additionalTreelistUpdateFields as $additionalTreelistUpdateField) { + $this->updateRequiringFields[] = $additionalTreelistUpdateField; + } + } + + } + + /** + * waits for TCEmain commands and looks for changed pages, if found further + * changes take place to determine whether the cache needs to be updated + * + * @param string TCEmain operation status, either 'new' or 'update' + * @param string the DB table the operation was carried out on + * @param mixed the record's uid for update records, a string to look the record's uid up after it has been created + * @param array array of changed fiels and their new values + * @param t3lib_TCEmain TCEmain parent object + */ + public function processDatamap_afterDatabaseOperations($status, $table, $recordId, array $updatedFields, t3lib_TCEmain $tceMain) { + + if($table == 'pages' && $this->requiresUpdate($updatedFields)) { + $affectedPagePid = 0; + $affectedPageUid = 0; + + if ($status == 'new') { + // detect new pages + + // resolve the uid + $affectedPageUid = $tceMain->substNEWwithIDs[$recordId]; + $affectedPagePid = $updatedFields['pid']; + } elseif ($status == 'update') { + // detect updated pages + + $affectedPageUid = $recordId; + + /* + when updating a page the pid is not directly available so we + need to retrieve it ourselves. + */ + $fullPageRecord = t3lib_BEfunc::getRecord($table, $recordId); + $affectedPagePid = $fullPageRecord['pid']; + } + + $clearCacheActions = $this->determineClearCacheActions( + $status, + $updatedFields + ); + + $this->processClearCacheActions( + $affectedPageUid, + $affectedPagePid, + $updatedFields, + $clearCacheActions + ); + } + } + + /** + * waits for TCEmain commands and looks for deleted pages, if found further + * changes take place to determine whether the cache needs to be updated + * + * @param string the TCE command + * @param string the record's table + * @param integer the record's uid + * @param array the commands value, typically an array with more detailed command information + * @param t3lib_TCEmain the TCEmain parent object + */ + public function processCmdmap_postProcess($command, $table, $recordId, $commandValue, t3lib_TCEmain $tceMain) { + + if ($table == 'pages' && $command == 'delete') { + + $deletedRecord = t3lib_BEfunc::getRecord( + $table, + $recordId, + '*', + '', + FALSE + ); + + $affectedPageUid = $deletedRecord['uid']; + $affectedPagePid = $deletedRecord['pid']; + // faking the updated fields + $updatedFields = array('deleted' => 1); + + $clearCacheActions = $this->determineClearCacheActions( + 'update', + $updatedFields + ); + + $this->processClearCacheActions( + $affectedPageUid, + $affectedPagePid, + $updatedFields, + $clearCacheActions + ); + } + } + + /** + * waits for TCEmain commands and looks for moved pages, if found further + * changes take place to determine whether the cache needs to be updated + * + * @param string table name of the moved record + * @param integer the record's uid + * @param integer the record's destination page id + * @param array the record that moved + * @param array array of changed fields + * @param t3lib_TCEmain TCEmain parent object + */ + public function moveRecord_firstElementPostProcess($table, $recordId, $destinationPid, array $movedRecord, array $updatedFields, t3lib_TCEmain $tceMain) { + + if($table == 'pages' && $this->requiresUpdate($updatedFields)) { + + $affectedPageUid = $recordId; + $affectedPageOldPid = $movedRecord['pid']; + $affectedPageNewPid = $updatedFields['pid']; + + $clearCacheActions = $this->determineClearCacheActions( + 'update', + $updatedFields + ); + + // clear treelist entries for old parent page + $this->processClearCacheActions( + $affectedPageUid, + $affectedPageOldPid, + $updatedFields, + $clearCacheActions + ); + // clear treelist entries for new parent page + $this->processClearCacheActions( + $affectedPageUid, + $affectedPageNewPid, + $updatedFields, + $clearCacheActions + ); + } + } + + /** + * waits for TCEmain commands and looks for moved pages, if found further + * changes take place to determine whether the cache needs to be updated + * + * @param string table name of the moved record + * @param integer the record's uid + * @param integer the record's destination page id + * @param integer (negative) page id th page has been moved after + * @param array the record that moved + * @param array array of changed fields + * @param t3lib_TCEmain TCEmain parent object + */ + public function moveRecord_afterAnotherElementPostProcess($table, $recordId, $destinationPid, $originalDestinationPid, array $movedRecord, array $updatedFields, t3lib_TCEmain $tceMain) { + + if($table == 'pages' && $this->requiresUpdate($updatedFields)) { + + $affectedPageUid = $recordId; + $affectedPageOldPid = $movedRecord['pid']; + $affectedPageNewPid = $updatedFields['pid']; + + $clearCacheActions = $this->determineClearCacheActions( + 'update', + $updatedFields + ); + + // clear treelist entries for old parent page + $this->processClearCacheActions( + $affectedPageUid, + $affectedPageOldPid, + $updatedFields, + $clearCacheActions + ); + // clear treelist entries for new parent page + $this->processClearCacheActions( + $affectedPageUid, + $affectedPageNewPid, + $updatedFields, + $clearCacheActions + ); + } + } + + /** + * checks whether the change requires an update of the treelist cache + * + * @param array array of changed fields + * @return boolean true if the treelist cache needs to be updated, false if no update to the cache is required + */ + protected function requiresUpdate(array $updatedFields) { + $requiresUpdate = FALSE; + + $updatedFieldNames = array_keys($updatedFields); + foreach ($updatedFieldNames as $updatedFieldName) { + if(in_array($updatedFieldName, $this->updateRequiringFields)) { + $requiresUpdate = TRUE; + break; + } + } + + return $requiresUpdate; + } + + /** + * calls the cache maintainance functions according to the determined actions + * + * @param integer uid of the affected page + * @param integer parent uid of the affected page + * @param array array of updated fields and their new values + * @param array array of actions to carry out + */ + protected function processClearCacheActions($affectedPage, $affectedParentPage, $updatedFields, array $actions) { + $actionNames = array_keys($actions); + foreach ($actionNames as $actionName) { + switch ($actionName) { + case 'allParents': + $this->clearCacheForAllParents($affectedParentPage); + break; + case 'setExpiration': + // only used when setting an end time for a page + $expirationTime = $updatedFields['endtime']; + $this->setCacheExpiration($affectedPage, $expirationTime); + break; + case 'uidInTreelist': + $this->clearCacheWhereUidInTreelist($affectedPage); + break; + } + } + + // from time to time clean the cache from expired entries + // (theoretically every 1000 calls) + $randomNumber = rand(1, 1000); + if($randomNumber == 500) { + $this->removeExpiredCacheEntries(); + } + } + + /** + * clears the treelist cache for all parents of a changed page. + * gets called after creating a new page and after moving a page + * + * @param integer parent page id of the changed page, the page to start clearing from + */ + protected function clearCacheForAllParents($affectedParentPage) { + + $rootline = t3lib_BEfunc::BEgetRootLine($affectedParentPage); + + $rootlineIds = array(); + foreach($rootline as $page) { + if($page['uid'] != 0) { + $rootlineIds[] = $page['uid']; + } + } + + foreach($rootlineIds as $rootlineId) { + + // delete the rootline, must contain + $GLOBALS['TYPO3_DB']->exec_DELETEquery( + 'cache_treelist', + 'pid = ' . $rootlineId + ); + } + } + + /** + * clears the treelist cache for all pages where the affected page is found + * in the treelist + * + * @param integer Id of the changed page + */ + protected function clearCacheWhereUidInTreelist($affectedPage) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery( + 'cache_treelist', + $GLOBALS['TYPO3_DB']->listQuery( + 'treelist', + $affectedPage, + 'cache_treelist' + ) + ); + } + + /** + * sets an expiration time for all cache entries having the changed page in + * the treelist. + * + * @param integer uid of the changed page + */ + protected function setCacheExpiration($affectedPage, $expirationTime) { + + $GLOBALS['TYPO3_DB']->exec_UPDATEquery( + 'cache_treelist', + $GLOBALS['TYPO3_DB']->listQuery( + 'treelist', + $affectedPage, + 'cache_treelist' + ), + array( + 'expires' => $expirationTime + ) + ); + } + + /** + * removes all expired treelist cache entries + * + */ + protected function removeExpiredCacheEntries() { + $GLOBALS['TYPO3_DB']->exec_DELETEquery( + 'cache_treelist', + 'expires <= ' . time() + ); + } + + /** + * determines what happened to the page record, this is necessary to clear + * as less cache entries as needed later + * + * @param string TCEmain operation status, either 'new' or 'update' + * @param array array of updated fields + * @return string list of actions that happened to the page record + */ + protected function determineClearCacheActions($status, $updatedFields) { + $actions = array(); + + if ($status == 'new') { + // new page + $actions['allParents'] = TRUE; + } elseif ($status == 'update') { + $updatedFieldNames = array_keys($updatedFields); + + foreach ($updatedFieldNames as $updatedFieldName) { + switch ($updatedFieldName) { + case 'pid': + // page moved + $actions['allParents'] = TRUE; + $actions['uidInTreelist'] = TRUE; + break; + case $GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['disabled']: + case $GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['fe_group']: + case $GLOBALS['TCA']['pages']['ctrl']['delete']: + case 'extendToSubpages': + case 'php_tree_stop': + // page hidden / unhidden / deleted / extendToSubpages set + // php_tree_stop and/or FE groups set + $actions['uidInTreelist'] = TRUE; + break; + case $GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['starttime']: + /* + start time set/unset + Doesn't matter whether it was set or unset, in both + cases the cache needs to be cleared. When setting a + start time the page must be removed from the + treelist. When unsetting the start time it must + become listed in the tree list again. + */ + $actions['uidInTreelist'] = TRUE; + break; + case $GLOBALS['TCA']['pages']['ctrl']['enablecolumns']['endtime']: + /* + end time set/unset + When setting an end time the cache entry needs an + expiration time. When unsetting the end time the + page must become listed in the treelist again. + */ + if($updatedFields['endtime'] > 0) { + $actions['setExpiration'] = TRUE; + } else { + $actions['uidInTreelist'] = TRUE; + } + break; + default: + if (in_array($updatedFieldName, $this->updateRequiringFields)) { + $actions['uidInTreelist'] = TRUE; + } + } + } + } + + return $actions; + } + +} + + +if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['tslib/hooks/class.tx_cms_treelistcacheupdate.php']) { + include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['tslib/hooks/class.tx_cms_treelistcacheupdate.php']); +} diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/ChangeLog typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ChangeLog --- typo3_src-4.2.2/typo3/sysext/indexed_search/ChangeLog 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ChangeLog 2008-11-04 00:59:01.000000000 +0100 @@ -1,3 +1,7 @@ +2008-11-03 Michael Stucki + + * Check if files in search matches are still existing before displaying them + 2008-04-01 Michael Stucki * Fixed bug #7980: Fix wrong TypoScript code in plugin template diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/class.doublemetaphone.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/class.doublemetaphone.php --- typo3_src-4.2.2/typo3/sysext/indexed_search/class.doublemetaphone.php 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/class.doublemetaphone.php 2008-11-04 00:59:01.000000000 +0100 @@ -36,9 +36,8 @@ // TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so: -// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file: -// TYPO3: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone'; -// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example. +// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you can enable it in the extension configuration +// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php) class user_DoubleMetaPhone { diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/class.indexer.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/class.indexer.php --- typo3_src-4.2.2/typo3/sysext/indexed_search/class.indexer.php 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/class.indexer.php 2008-11-04 00:59:01.000000000 +0100 @@ -191,6 +191,10 @@ var $freqRange = 32000; var $freqMax = 0.1; + var $enableMetaphoneSearch = false; + var $storeMetaphoneInfoAsWords; + var $metaphoneContent = ''; + // Objects: /** * Charset class object @@ -452,6 +456,11 @@ $this->maxExternalFiles = t3lib_div::intInRange($this->indexerConfig['maxExternalFiles'],0,1000,5); $this->flagBitMask = t3lib_div::intInRange($this->indexerConfig['flagBitMask'],0,255); + // Workaround: If the extension configuration was not updated yet, the value is not existing + $this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0) : 1; + + $this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : ($this->enableMetaphoneSearch ? true : false); + // Initialize external document parsers: // Example configuration, see ext_localconf.php of this file! if ($this->conf['index_externals']) { @@ -468,7 +477,8 @@ // Initialize metaphone hook: // Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone'; - if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) { + // Make sure that the hook is loaded _after_ indexed_search as this may overwrite the hook depending on the configuration. + if ($this->enableMetaphoneSearch && $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']) { $this->metaphoneObj = &t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']); $this->metaphoneObj->pObj = &$this; } @@ -550,7 +560,7 @@ $this->log_pull(); // Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!) - $this->content_md5h = $this->md5inthash(implode($this->contentParts,'')); + $this->content_md5h = $this->md5inthash(implode('', $this->contentParts)); // This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash. // If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more. @@ -580,8 +590,10 @@ // Check words and submit to word list if not there $this->log_push('Check word list and submit words',''); - $this->checkWordList($indexArr); - $this->submitWords($indexArr,$this->hash['phash']); + if ($this->isTableUsed('index_words')) { + $this->checkWordList($indexArr); + $this->submitWords($indexArr,$this->hash['phash']); + } $this->log_pull(); // Set parsetime @@ -1055,8 +1067,10 @@ // Check words and submit to word list if not there $this->log_push('Check word list and submit words',''); - $this->checkWordList($indexArr); - $this->submitWords($indexArr,$phash_arr['phash']); + if ($this->isTableUsed('index_words')) { + $this->checkWordList($indexArr); + $this->submitWords($indexArr,$phash_arr['phash']); + } $this->log_pull(); // Set parsetime @@ -1244,7 +1258,7 @@ $this->analyzeHeaderinfo($indexArr,$content,'description',5); $this->analyzeBody($indexArr,$content); - return ($indexArr); + return $indexArr; } /** @@ -1257,13 +1271,29 @@ * @return void */ function analyzeHeaderinfo(&$retArr,$content,$key,$offset) { - reset($content[$key]); - while(list(,$val)=each($content[$key])) { - $val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same. + foreach ($content[$key] as $val) { + $val = substr($val,0,60); // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same. + + if (!isset($retArr[$val])) { + // Word ID (wid) + $retArr[$val]['hash'] = $this->md5inthash($val); + + // Metaphone value is also 60 only chars long + $metaphone = $this->enableMetaphoneSearch + ? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60) + : ''; + $retArr[$val]['metaphone'] = $metaphone; + } + + // Build metaphone fulltext string (can be used for fulltext indexing) + if ($this->storeMetaphoneInfoAsWords) { + $this->metaphoneContent.= ' '.$retArr[$val]['metaphone']; + } + + // Priority used for flagBitMask feature (see extension configuration) $retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset); - $retArr[$val]['count'] = $retArr[$val]['count']+1; - $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7)); - $retArr[$val]['metaphone'] = $this->metaphone($val); + + $retArr[$val]['count']++; // Increase number of occurences $this->wordcount++; } } @@ -1276,14 +1306,29 @@ * @return void */ function analyzeBody(&$retArr,$content) { - foreach($content['body'] as $key => $val) { - $val = substr($val,0,60); // Max 60 - because the baseword varchar IS 60. This MUST be the same. - if(!isset($retArr[$val])) { + foreach ($content['body'] as $key => $val) { + $val = substr($val,0,60); // Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same. + + if (!isset($retArr[$val])) { + // First occurence (used for ranking results) $retArr[$val]['first'] = $key; - $retArr[$val]['hash'] = hexdec(substr(md5($val),0,7)); - $retArr[$val]['metaphone'] = $this->metaphone($val); + + // Word ID (wid) + $retArr[$val]['hash'] = $this->md5inthash($val); + + // Metaphone value is also only 60 chars long + $metaphone = $this->enableMetaphoneSearch + ? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60) + : ''; + $retArr[$val]['metaphone'] = $metaphone; + } + + // Build metaphone fulltext string (can be used for fulltext indexing) + if ($this->storeMetaphoneInfoAsWords) { + $this->metaphoneContent.= ' '.$retArr[$val]['metaphone']; } - $retArr[$val]['count'] = $retArr[$val]['count']+1; + + $retArr[$val]['count']++; // Increase number of occurences $this->wordcount++; } } @@ -1300,14 +1345,18 @@ if (is_object($this->metaphoneObj)) { $tmp = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']); } else { + // Use native PHP function instead of advanced doubleMetaphone class $tmp = metaphone($word); } - // Return raw value? - if ($retRaw) return $tmp; + if ($retRaw) { // Return raw value? + $ret = $tmp; + } elseif (strlen($tmp)) { // Otherwise create hash and return integer + $ret = $this->md5inthash($tmp); + } else { + $ret = 0; + } - // Otherwise create hash and return integer - if($tmp=='') $ret=0; else $ret=hexdec(substr(md5($tmp),0,7)); return $ret; } @@ -1368,7 +1417,9 @@ 'freeIndexSetId' => intval($this->conf['freeIndexSetId']), ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + } // PROCESSING index_section $this->submit_section($this->hash['phash'],$this->hash['phash']); @@ -1379,12 +1430,15 @@ // PROCESSING index_fulltext $fields = array( 'phash' => $this->hash['phash'], - 'fulltextdata' => implode(' ', $this->contentParts) + 'fulltextdata' => implode(' ', $this->contentParts), + 'metaphonedata' => $this->metaphoneContent ); if ($this->indexerConfig['fullTextDataLength']>0) { $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']); } - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + if ($this->isTableUsed('index_fulltext')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + } // PROCESSING index_debug if ($this->indexerConfig['debugMode']) { @@ -1399,7 +1453,9 @@ 'lexer' => $this->lexerObj->debugString, )) ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + if ($this->isTableUsed('index_debug')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + } } } @@ -1420,7 +1476,9 @@ 'hash_gr_list' => $this->md5inthash($this->conf['gr_list']), 'gr_list' => $this->conf['gr_list'] ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields); + if ($this->isTableUsed('index_grlist')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields); + } } /** @@ -1440,7 +1498,9 @@ $this->getRootLineFields($fields); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields); + if ($this->isTableUsed('index_section')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields); + } } /** @@ -1452,11 +1512,15 @@ function removeOldIndexedPages($phash) { // Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here. $tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug'); - foreach($tableArr as $table) { - $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + foreach ($tableArr as $table) { + if ($this->isTableUsed($table)) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + } } // Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file). - $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash)); + if ($this->isTableUsed('index_section')) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash)); + } } @@ -1520,22 +1584,27 @@ 'tstamp' => time(), 'crdate' => time(), 'gr_list' => $this->conf['gr_list'], - 'externalUrl' => $fileParts['scheme'] ? 1 : 0, - 'recordUid' => intval($this->conf['recordUid']), - 'freeIndexUid' => intval($this->conf['freeIndexUid']), - 'freeIndexSetId' => intval($this->conf['freeIndexSetId']), + 'externalUrl' => $fileParts['scheme'] ? 1 : 0, + 'recordUid' => intval($this->conf['recordUid']), + 'freeIndexUid' => intval($this->conf['freeIndexUid']), + 'freeIndexSetId' => intval($this->conf['freeIndexSetId']), ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields); + } // PROCESSING index_fulltext $fields = array( 'phash' => $hash['phash'], - 'fulltextdata' => implode(' ', $contentParts) + 'fulltextdata' => implode(' ', $contentParts), + 'metaphonedata' => $this->metaphoneContent ); if ($this->indexerConfig['fullTextDataLength']>0) { $fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']); } - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + if ($this->isTableUsed('index_fulltext')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields); + } // PROCESSING index_debug if ($this->indexerConfig['debugMode']) { @@ -1548,7 +1617,9 @@ 'lexer' => $this->lexerObj->debugString, )) ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + if ($this->isTableUsed('index_debug')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields); + } } } @@ -1560,8 +1631,13 @@ */ function submitFile_grlist($hash) { // Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one. - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')'); - if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')'); + } else { + $res = false; + } + + if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { $this->submit_grlist($hash,$hash); } } @@ -1573,9 +1649,14 @@ * @return void */ function submitFile_section($hash) { - // Testing if there is a section - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id'])); - if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + // Testing if there is already a section + if ($this->isTableUsed('index_section')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id'])); + } else { + $res = false; + } + + if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { $this->submit_section($hash,$this->hash['phash']); } } @@ -1587,11 +1668,12 @@ * @return void */ function removeOldIndexedFiles($phash) { - // Removing old registrations for tables. $tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug'); - foreach($tableArr as $table) { - $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + foreach ($tableArr as $table) { + if ($this->isTableUsed($table)) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash)); + } } } @@ -1623,13 +1705,17 @@ * @return integer Result integer: Generally: <0 = No indexing, >0 = Do indexing (see $this->reasons): -2) Min age was NOT exceeded and so indexing cannot occur. -1) mtime matched so no need to reindex page. 0) N/A 1) Max age exceeded, page must be indexed again. 2) mtime of indexed page doesn't match mtime given for current content and we must index page. 3) No mtime was set, so we will index... 4) No indexed page found, so of course we will index. */ function checkMtimeTstamp($mtime,$phash) { + $out = 0; // Select indexed page: - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash)); - $out = 0; + if ($this->isTableUsed('index_phash')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash)); + } else { + $res = false; + } // If there was an indexing of the page...: - if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { if ($this->tstamp_maxAge && ($row['tstamp']+$this->tstamp_maxAge) < time()) { // If max age is exceeded, index the page $out = 1; // The configured max-age was exceeded for the document and thus it's indexed. } else { @@ -1660,8 +1746,13 @@ */ function checkContentHash() { // With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page. - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h)); - if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if ($this->isTableUsed('index_phash')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash', 'phash_grouping='.intval($this->hash['phash_grouping']).' AND contentHash='.intval($this->content_md5h)); + } else { + $res = false; + } + + if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { return $row; } return 1; @@ -1676,8 +1767,13 @@ * @return boolean Returns true if the document needs to be indexed (that is, there was no result) */ function checkExternalDocContentHash($hashGr,$content_md5h) { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h)); - if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if ($this->isTableUsed('index_phash')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash', 'phash_grouping='.intval($hashGr).' AND contentHash='.intval($content_md5h)); + } else { + $res = false; + } + + if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { return 0; } return 1; @@ -1690,8 +1786,12 @@ * @return void */ function is_grlist_set($phash_x) { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x)); - return $GLOBALS['TYPO3_DB']->sql_num_rows($res); + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x)); + } else { + $res = false; + } + return $res ? $GLOBALS['TYPO3_DB']->sql_num_rows($res) : false; } /** @@ -1703,8 +1803,13 @@ * @see submit_grlist() */ function update_grlist($phash,$phash_x) { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list'])); - if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list'])); + } else { + $res = false; + } + + if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) { $this->submit_grlist($phash,$phash_x); $this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1); } @@ -1723,7 +1828,9 @@ ); if ($mtime) { $updateFields['item_mtime'] = intval($mtime); } - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + } } /** @@ -1737,7 +1844,9 @@ 'freeIndexSetId' => intval($this->conf['freeIndexSetId']) ); - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + } } /** @@ -1752,7 +1861,9 @@ 'parsetime' => intval($parsetime) ); - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + if ($this->isTableUsed('index_phash')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields); + } } /** @@ -1765,7 +1876,9 @@ $updateFields = array(); $this->getRootLineFields($updateFields); - $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields); + if ($this->isTableUsed('index_section')) { + $GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields); + } } /** @@ -1795,14 +1908,21 @@ * @return void */ function removeLoginpagesWithContentHash() { - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', ' + if ($this->isTableUsed('index_phash,index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', ' A.phash=B.phash AND A.phash_grouping='.intval($this->hash['phash_grouping']).' AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).' AND A.contentHash='.intval($this->content_md5h)); - while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { - $this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1); - $this->removeOldIndexedPages($row['phash']); + } else { + $res = false; + } + + if ($res) { + while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + $this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1); + $this->removeOldIndexedPages($row['phash']); + } } } @@ -1817,6 +1937,27 @@ require_once(t3lib_extMgm::extPath('crawler').'class.tx_crawler_lib.php'); } + /** + * Check if the tables provided are configured for usage. + * This becomes neccessary for extensions that provide additional database functionality like indexed_search_mysql. + * + * @param string Comma-separated list of tables + * @return boolean True if given tables are enabled + */ + function isTableUsed($table_list) { + $OK = true; + $tableArr = t3lib_div::trimExplode(',', $table_list); + $enabledTableList = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['use_tables']; + + foreach ($tableArr as $table) { + if (!t3lib_div::inList($enabledTableList, $table)) { + $OK = false; + } + } + + return $OK; + } + @@ -1846,23 +1987,29 @@ } if (count($phashArr)) { $cwl = implode(',',$phashArr); - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')'); + if ($this->isTableUsed('index_words')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')'); + } else { + $res = false; + } - if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) { + if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) { $this->log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1); - while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { unset($wl[$row['baseword']]); } reset($wl); - while(list($key,$val)=each($wl)) { + while (list($key,$val)=each($wl)) { $insertFields = array( 'wid' => $val['hash'], 'baseword' => $key, 'metaphone' => $val['metaphone'] ); // A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem. - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields); + if ($this->isTableUsed('index_words')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields); + } } } } @@ -1876,9 +2023,11 @@ * @return void */ function submitWords($wl,$phash) { - $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash)); + if ($this->isTableUsed('index_rel')) { + $GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash)); + } - foreach($wl as $val) { + foreach ($wl as $val) { $insertFields = array( 'phash' => $phash, 'wid' => $val['hash'], @@ -1888,7 +2037,9 @@ 'flags' => ($val['cmp'] & $this->flagBitMask) ); - $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields); + if ($this->isTableUsed('index_rel')) { + $GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields); + } } } diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/doc/README.txt typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/doc/README.txt --- typo3_src-4.2.2/typo3/sysext/indexed_search/doc/README.txt 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/doc/README.txt 2008-11-04 00:59:01.000000000 +0100 @@ -1,2 +1,24 @@ A full documentation manual for the indexed search extension can be found in the extension "doc_indexed_search" in the TER. See http://typo3.org/documentation/document-library/extension-manuals/doc_indexed_search/current/view/ + + +This is a list of all tables which are used by this extension: + +index_phash +- Page information + +index_fulltext +- Fulltext data + +index_rel +- Relations between index_phash and index_words + +index_words +- baseword table + +index_section +- section index (= first 3 levels of the rootline for this document) + +index_grlist +- group list information +- indicates which gr_list has access to which phash diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/ext_conf_template.txt typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ext_conf_template.txt --- typo3_src-4.2.2/typo3/sysext/indexed_search/ext_conf_template.txt 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ext_conf_template.txt 2008-11-04 00:59:01.000000000 +0100 @@ -28,6 +28,9 @@ # cat=basic; type=boolean; label=Disable Indexing in Frontend: By default pages are indexed during viewing of pages in the frontend. You can disable this features so indexing of pages is only initiated through the backend page crawler. disableFrontendIndexing = 0 + # cat=basic; type=int; label=Enable metaphone search (sounds like). 0=disabled, 1=use internal metaphone parser, 2=use advanced doubleMetaphone parser. +enableMetaphoneSearch = 1 + # cat=basic; type=int; label=Min TTL (hours) for indexed page: The time in hours that must pass before an indexed page can be indexed again regardless of changes on the page. minAge = 24 @@ -40,7 +43,7 @@ # cat=basic; type=boolean; label=Use "crawler" extension to index external files: When external files are found on a page they are added to the "crawler" extensions queue and indexed via the cronscript running the crawler. This eliminates problems with for example many PDF files on a page. Requires a proper configuration of the "crawler" extension. useCrawlerForExternalFiles = 0 - # cat=basic; type=int; label=Bitmask for Flags (Advanced): By this value (0-255) you can filter the importance of (128), <keywords> (64) and <description> (32) content from HTML documents. By default none of these will have any importance over the other. Setting the value to eg. 192 means that title-tag content and meta-keywords will be flagged (and rate higher in search results) + # cat=basic; type=int; label=Bitmask for Flags (Advanced): By this value (0-255) you can filter the importance of <title> (128), <keywords> (64) and <description> (32) content from HTML documents. By setting this to 0, none of these fields will have any importance over the other. The default value 192 means that title-tag content and meta-keywords will be flagged (and rated higher in search results) flagBitMask = 192 # cat=basic; type=string; label=Ignore Extensions: List of file extensions that the external parser will ignore (despite having support for them). Comma list. diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/ext_localconf.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ext_localconf.php --- typo3_src-4.2.2/typo3/sysext/indexed_search/ext_localconf.php 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ext_localconf.php 2008-11-04 00:59:01.000000000 +0100 @@ -43,6 +43,14 @@ 'tif' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse', ); +$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_rel,index_words,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config'; + +$_EXTCONF = unserialize($_EXTCONF); // unserializing the configuration so we can use it here: + + // Use the advanced doubleMetaphone parser instead of the internal one (usage of metaphone parsers is generally disabled by default) +if (isset($_EXTCONF['enableMetaphoneSearch']) && intval($_EXTCONF['enableMetaphoneSearch'])==2) { + $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone'; +} // EXAMPLE configuration of hooks: /* diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/ext_tables.sql typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ext_tables.sql --- typo3_src-4.2.2/typo3/sysext/indexed_search/ext_tables.sql 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/ext_tables.sql 2008-11-04 00:59:01.000000000 +0100 @@ -38,6 +38,7 @@ CREATE TABLE index_fulltext ( phash int(11) DEFAULT '0' NOT NULL, fulltextdata mediumtext, + metaphonedata mediumtext NOT NULL, PRIMARY KEY (phash) ) ENGINE=InnoDB; @@ -116,7 +117,6 @@ PRIMARY KEY (uid) ) ENGINE=InnoDB; - # # Table structure for table 'index_stat_word' # @@ -125,6 +125,7 @@ word varchar(30) DEFAULT '' NOT NULL, index_stat_search_id int(11) DEFAULT '0' NOT NULL, tstamp int(11) DEFAULT '0' NOT NULL, + pageid int(11) DEFAULT '0' NOT NULL, PRIMARY KEY (uid), KEY tstamp (tstamp,word) ) ENGINE=InnoDB; @@ -177,17 +178,3 @@ PRIMARY KEY (uid), KEY parent (pid) ); - - -# -# Table structure for table 'index_stat_word' -# -CREATE TABLE index_stat_word ( - uid int(11) NOT NULL auto_increment, - word varchar(30) DEFAULT '' NOT NULL, - index_stat_search_id int(11) DEFAULT '0' NOT NULL, - tstamp int(11) DEFAULT '0' NOT NULL, - pageid int(11) DEFAULT '0' NOT NULL, - PRIMARY KEY (uid), - KEY tstamp (tstamp,word) -) ENGINE=InnoDB; diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php --- typo3_src-4.2.2/typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/modfunc1/class.tx_indexedsearch_modfunc1.php 2008-11-04 00:59:01.000000000 +0100 @@ -109,6 +109,8 @@ var $allPhashListed = array(); // phash values accumulations for link to clear all var $external_parsers = array(); // External content parsers - objects set here with file extensions as keys. var $iconFileNameCache = array(); // File extensions - icon map/cache. + var $indexerConfig = array(); // Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search'] + var $enableMetaphoneSearch = false; /** * Indexer object @@ -154,6 +156,12 @@ // Return if no page id: if ($this->pObj->id<=0) return; + // Indexer configuration from Extension Manager interface: + $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); + + // Workaround: If the extension configuration was not updated yet, the value is not existing + $this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0) : 1; + // Initialize max-list items $this->maxListPerPage = t3lib_div::_GP('listALL') ? 100000 : 100; @@ -206,7 +214,7 @@ $theOutput.=$this->pObj->doc->spacer(5); $theOutput.=$this->pObj->doc->section('Details for a word:',$this->showDetailsForWord(t3lib_div::_GET('wid')),0,1); - } elseif (t3lib_div::_GET('metaphone')) { + } elseif ($this->enableMetaphoneSearch && t3lib_div::_GET('metaphone')) { // Show title / function menu: $theOutput.=$this->pObj->doc->spacer(5); $theOutput.=$this->pObj->doc->section('Details for metaphone value:',$this->showDetailsForMetaphone(t3lib_div::_GET('metaphone')),0,1); @@ -677,12 +685,14 @@ $showStopWordCheckBox = $GLOBALS['BE_USER']->isAdmin(); $content.= $this->listWords($ftrows, 'All words found on page ('.count($ftrows).'):', $showStopWordCheckBox, $pageRec); - // Group metaphone hash: - $metaphone = array(); - foreach($ftrows as $row) { - $metaphone[$row['metaphone']][] = $row['baseword']; + if ($this->enableMetaphoneSearch) { + // Group metaphone hash: + $metaphone = array(); + foreach ($ftrows as $row) { + $metaphone[$row['metaphone']][] = $row['baseword']; + } + $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:'); } - $content.= $this->listMetaphoneStat($metaphone, 'Metaphone stats:'); // Finding top-20 on frequency for this phash: $ftrows = $GLOBALS['TYPO3_DB']->exec_SELECTgetRows( diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php --- typo3_src-4.2.2/typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php 2008-11-04 00:59:01.000000000 +0100 @@ -150,10 +150,16 @@ var $cache_rl = array(); // Caching of root line data var $fe_groups_required = array(); // Required fe_groups memberships for display of a result. var $domain_records = array(); // Domain records (?) - var $wSelClauses = array(); // Select clauses for individual words var $resultSections = array(); // Page tree sections for search result. var $external_parsers = array(); // External parser objects var $iconFileNameCache = array(); // Storage of icons.... + var $templateCode; // Will hold the content of $conf['templateFile'] + var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results'; + var $indexerConfig = array(); // Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search'] + + var $enableMetaphoneSearch = false; + var $storeMetaphoneInfoAsWords; + /** * Lexer object @@ -162,14 +168,8 @@ */ var $lexerObj; - /** - * Indexer object - * - * @var tx_indexedsearch_indexer - */ - var $indexerObj; - var $templateCode; // Will hold the content of $conf['templateFile'] - var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results'; + const WILDCARD_LEFT = 1; + const WILDCARD_RIGHT = 2; /** @@ -186,9 +186,6 @@ $this->pi_loadLL(); $this->pi_setPiVarDefaults(); - // Initialize the indexer-class - just to use a few function (for making hashes) - $this->indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); - // Initialize: $this->initialize(); @@ -203,8 +200,8 @@ $this->printRules(). $content; - return $this->pi_wrapInBaseClass($content); - } + return $this->pi_wrapInBaseClass($content); + } /** * Initialize internal variables, especially selector box values for the search form and search words @@ -214,6 +211,11 @@ function initialize() { global $TYPO3_CONF_VARS; + // Indexer configuration from Extension Manager interface: + $this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']); + $this->enableMetaphoneSearch = $this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0; + $this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : true; + // Initialize external document parsers for icon display and other soft operations if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'])) { foreach ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef) { @@ -305,6 +307,10 @@ ) ); + if (!$this->enableMetaphoneSearch) { + unset ($this->optValues['type']['10']); // Remove this option if metaphone search is disabled) + } + // Free Index Uid: if ($this->conf['search.']['defaultFreeIndexUidList']) { $uidList = t3lib_div::intExplode(',', $this->conf['search.']['defaultFreeIndexUidList']); @@ -378,8 +384,10 @@ // Add search languages: $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'sys_language', '1=1'.$this->cObj->enableFields('sys_language')); - while($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { - $this->optValues['lang'][$lR['uid']] = $lR['title']; + if ($res) { + while ($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + $this->optValues['lang'][$lR['uid']] = $lR['title']; + } } // Calling hook for modification of initialized content @@ -416,11 +424,11 @@ } /** - * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holds the SQL operator (eg. AND, OR) + * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holding the SQL operator (eg. AND, OR) * * Only words with 2 or more characters are accepted * Max 200 chars total - * Space is used to split words, "" can be used search for a whole string (not indexed search then) + * Space is used to split words, "" can be used search for a whole string * AND, OR and NOT are prefix words, overruling the default operator * +/|/- equals AND, OR and NOT as operators. * All search words are converted to lowercase. @@ -438,12 +446,18 @@ $inSW = $GLOBALS['TSFE']->csConvObj->utf8_encode($inSW, $GLOBALS['TSFE']->metaCharset); $inSW = $GLOBALS['TSFE']->csConvObj->entities_to_utf8($inSW,TRUE); + $sWordArray = false; if ($hookObj = &$this->hookRequest('getSearchWords')) { - return $hookObj->getSearchWords_splitSWords($inSW, $defOp); + $sWordArray = $hookObj->getSearchWords_splitSWords($inSW, $defOp); } else { - if ($this->piVars['type']==20) { - return array(array('sword'=>trim($inSW), 'oper'=>'AND')); + if ($this->piVars['type']==20) { // Sentence + $sWordArray = array( + array( + 'sword' => trim($inSW), + 'oper' => 'AND' + ) + ); } else { $search = t3lib_div::makeInstance('tslib_search'); $search->default_operator = $defOp==1 ? 'OR' : 'AND'; @@ -451,10 +465,12 @@ $search->register_and_explode_search_string($inSW); if (is_array($search->sword_array)) { - return $this->procSearchWordsByLexer($search->sword_array); + $sWordArray = $this->procSearchWordsByLexer($search->sword_array); } } } + + return $sWordArray; } /** @@ -570,7 +586,11 @@ // Getting SQL result pointer: $GLOBALS['TT']->push('Searching result'); - $res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid); + if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) { + $res = $hookObj->getResultRows_SQLpointer($sWArr,$freeIndexUid); + } else { + $res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid); + } $GLOBALS['TT']->pull(); // Organize and process result: @@ -592,6 +612,14 @@ // Each row should contain the fields from 'ISEC.*, IP.*' combined + artificial fields "show_resume" (boolean) and "result_number" (counter) while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + if (!$this->checkExistance($row)) { + // Check if the record is still available or if it has been deleted meanwhile. + // Currently this works for files only, since extending it to content elements would cause a lot of overhead... + // Otherwise, skip the row. + $count--; + continue; + } + // Set first row: if (!$c) { $firstRow = $row; @@ -611,11 +639,13 @@ $c++; // Increase the result pointer // All rows for display is put into resultRows[] - if ($c > $pointer * $this->piVars['results']) { + if ($c > $pointer * $this->piVars['results'] && $c <= ($pointer+1) * $this->piVars['results']) { $row['result_number'] = $c; $resultRows[] = $row; - // This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above). - if (!$exactCount && (($c+1) > ($pointer+1)*$this->piVars['results'])) { break; } + } + // This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above). + if (!$exactCount && (($c+1) > ($pointer+1) * $this->piVars['results'])) { + break; } } else { $count--; // Skip this row if the user cannot view it (missing permission) @@ -626,10 +656,10 @@ } return array( - 'resultRows' => $resultRows, - 'firstRow' => $firstRow, - 'count' => $count - ); + 'resultRows' => $resultRows, + 'firstRow' => $firstRow, + 'count' => $count + ); } else { // No results found: return FALSE; } @@ -643,7 +673,7 @@ * @return pointer */ function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1) { - // This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches. + // This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches. $list = $this->getPhashList($sWArr); // Perform SQL Search / collection of result rows array: @@ -825,60 +855,52 @@ // Initialize variables: $c=0; $totalHashList = array(); // This array accumulates the phash-values - $this->wSelClauses = array(); // Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator) foreach ($sWArr as $k => $v) { // Making the query for a single search word based on the search-type $sWord = $v['sword']; // $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower'); // lower-case all of them... $theType = (string)$this->piVars['type']; - if (strstr($sWord,' ')) $theType = 20; // If there are spaces in the search-word, make a full text search instead. + if (strstr($sWord,' ')) { + $theType = 20; // If there are spaces in the search-word, make a full text search instead. + } $GLOBALS['TT']->push('SearchWord "'.$sWord.'" - $theType='.$theType); - $res = ''; - $wSel=''; - // Perform search for word: - switch($theType) { + switch ($theType) { case '1': // Part of word - $wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'"; - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchWord($sWord, self::WILDCARD_LEFT | self::WILDCARD_RIGHT); break; case '2': // First part of word - $wSel = "IW.baseword LIKE '".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'"; - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchWord($sWord, self::WILDCARD_RIGHT); break; case '3': // Last part of word - $wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."'"; - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchWord($sWord, self::WILDCARD_LEFT); break; case '10': // Sounds like - $wSel = 'IW.metaphone = '.$this->indexerObj->metaphone($sWord); - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + + /** + * Indexer object + * + * @var tx_indexedsearch_indexer + */ + $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); // Initialize the indexer-class + + // Perform metaphone search + $res = $this->searchMetaphone($indexerObj->metaphone($sWord,$this->storeMetaphoneInfoAsWords)); + + unset($indexerObj); break; case '20': // Sentence - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( - 'ISEC.phash', - 'index_section ISEC, index_fulltext IFT', - 'IFT.fulltextdata LIKE \'%'.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_fulltext').'%\' AND - ISEC.phash = IFT.phash - '.$this->sectionTableWhere(), - 'ISEC.phash' - ); - $wSel = '1=1'; - - if ($this->piVars['type']==20) $this->piVars['order'] = 'mtime'; // If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instaed. It is not required, but otherwise some hits may be left out. + $res = $this->searchSentence($sWord); + $this->piVars['order'] = 'mtime'; // If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instead. It is not required, but otherwise some hits may be left out. break; default: // Distinct word - $wSel = 'IW.wid = '.$hash = $this->indexerObj->md5inthash($sWord); - $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + $res = $this->searchDistinct($sWord); break; } - // Accumulate the word-select clauses - $this->wSelClauses[] = $wSel; - // If there was a query to do, then select all phash-integers which resulted from this. if ($res) { @@ -922,6 +944,7 @@ * @return pointer SQL result pointer */ function execPHashListQuery($wordSel,$plusQ='') { + return $GLOBALS['TYPO3_DB']->exec_SELECTquery( 'IR.phash', 'index_words IW, @@ -937,28 +960,82 @@ } /** + * Search for a word + * + * @param TODO + * @param TODO + * @return pointer SQL result pointer + */ + function searchWord($sWord, $mode) { + $wildcard_left = ($mode & WILDCARD_LEFT) ? '%' : ''; + $wildcard_right = ($mode & WILDCARD_RIGHT) ? '%' : ''; + + $wSel = 'IW.baseword LIKE \''.$wildcard_left.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words').$wildcard_right.'\''; + $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + return $res; + } + + /** + * Search for one distinct word + * + * @return pointer SQL result pointer + */ + function searchDistinct($sWord) { + $wSel = 'IW.wid='.$this->md5inthash($sWord); + $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + return $res; + } + + /** + * Search for a sentence + * + * @return pointer SQL result pointer + */ + function searchSentence($sWord) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( + 'ISEC.phash', + 'index_section ISEC, index_fulltext IFT', + 'IFT.fulltextdata LIKE \'%'.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_fulltext').'%\' AND + ISEC.phash = IFT.phash + '.$this->sectionTableWhere(), + 'ISEC.phash' + ); + return $res; + } + + /** + * Search for a metaphone word + * + * @return pointer SQL result pointer + */ + function searchMetaphone($sWord) { + $wSel = 'IW.metaphone='.$sWord; + $res = $this->execPHashListQuery($wSel,' AND is_stopword=0'); + } + + /** * Returns AND statement for selection of section in database. (rootlevel 0-2 + page_id) * * @return string AND clause for selection of section in database. */ function sectionTableWhere() { - $out = $this->wholeSiteIdList<0 ? '' : 'AND ISEC.rl0 IN ('.$this->wholeSiteIdList.')'; + $out = $this->wholeSiteIdList<0 ? '' : ' AND ISEC.rl0 IN ('.$this->wholeSiteIdList.')'; $match = ''; if (substr($this->piVars['sections'],0,4)=='rl1_') { $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],4))); - $out.= 'AND ISEC.rl1 IN ('.$list.')'; + $out.= ' AND ISEC.rl1 IN ('.$list.')'; $match = TRUE; } elseif (substr($this->piVars['sections'],0,4)=='rl2_') { $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],4))); - $out.= 'AND ISEC.rl2 IN ('.$list.')'; + $out.= ' AND ISEC.rl2 IN ('.$list.')'; $match = TRUE; } elseif (is_array($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'])) { // Traversing user configured fields to see if any of those are used to limit search to a section: foreach ($GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['addRootLineFields'] as $fieldName => $rootLineLevel) { if (substr($this->piVars['sections'],0,strlen($fieldName)+1)==$fieldName.'_') { $list = implode(',',t3lib_div::intExplode(',',substr($this->piVars['sections'],strlen($fieldName)+1))); - $out.= 'AND ISEC.'.$fieldName.' IN ('.$list.')'; + $out.= ' AND ISEC.'.$fieldName.' IN ('.$list.')'; $match = TRUE; break; } @@ -990,18 +1067,18 @@ */ function mediaTypeWhere() { - switch((string)$this->piVars['media']) { + switch ((string)$this->piVars['media']) { case '0': // '0' => 'Kun TYPO3 sider', - $out = 'AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; + $out = ' AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; break; case '-2': // All external documents - $out = 'AND IP.item_type!='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; + $out = ' AND IP.item_type!='.$GLOBALS['TYPO3_DB']->fullQuoteStr('0', 'index_phash');; break; case '-1': // All content - $out=''; + $out = ''; break; default: - $out = 'AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->piVars['media'], 'index_phash'); + $out = ' AND IP.item_type='.$GLOBALS['TYPO3_DB']->fullQuoteStr($this->piVars['media'], 'index_phash'); break; } @@ -1092,44 +1169,14 @@ while(list(,$rootId)=each($siteIdNumbers)) { $id_list[] = $this->cObj->getTreeList($rootId,9999,0,0,'','').$rootId; } - $page_where = 'ISEC.page_id IN ('.implode(',',$id_list).')'; + $page_where = ' ISEC.page_id IN ('.implode(',',$id_list).')'; } else { // Disable everything... (select all) - $page_where = ' 1=1 '; + $page_where = ' 1=1'; } - // If any of the ranking sortings are selected, we must make a join with the word/rel-table again, because we need to calculate ranking based on all search-words found. - if (substr($this->piVars['order'],0,5)=='rank_') { - /* - OK there were some fancy calculations promoted by Graeme Merrall: - - "However, regarding relevance you probably want to look at something like - Salton's formula which is a good easy way to measure relevance. - Oracle Intermedia uses this and it's pretty simple: - Score can be between 0 and 100, but the top-scoring document in the query - will not necessarily have a score of 100 -- scoring is relative, not - absolute. This means that scores are not comparable across indexes, or even - across different queries on the same index. Score for each document is - computed using the standard Salton formula: - - 3f(1+log(N/n)) - - Where f is the frequency of the search term in the document, N is the total - number of rows in the table, and n is the number of rows which contain the - search term. This is converted into an integer in the range 0 - 100. - - There's a good doc on it at - http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/ - although it may be a little complex for what you require so just pick the - relevant parts out. - " - - However I chose not to go with this for several reasons. - I do not claim that my ways of calculating importance here is the best. - ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.) - */ - - switch($this->piVars['order']) { + if (substr($this->piVars['order'],0,5)=='rank_') { + switch ($this->piVars['order']) { case 'rank_flag': // This gives priority to word-position (max-value) so that words in title, keywords, description counts more than in content. // The ordering is refined with the frequency sum as well. $grsel = 'MAX(IR.flags) AS order_val1, SUM(IR.freq) AS order_val2'; @@ -1149,10 +1196,7 @@ break; } - // So, words are imploded into an OR statement (no "sentence search" should be done here - may deselect results) - $wordSel='('.implode(' OR ',$this->wSelClauses).') AND '; - - return $GLOBALS['TYPO3_DB']->exec_SELECTquery( + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( 'ISEC.*, IP.*, ' .$grsel, 'index_words IW, @@ -1160,18 +1204,18 @@ index_section ISEC, index_phash IP'. $page_join, - $wordSel.' - IP.phash IN ('.$list.') '. + 'IP.phash IN ('.$list.') '. $this->mediaTypeWhere().' '. $this->languageWhere(). $freeIndexUidClause.' AND IW.wid=IR.wid AND ISEC.phash = IR.phash AND IP.phash = IR.phash - AND '.$page_where, + AND '.$page_where, 'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2 ,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId', $orderBy ); + } else { // Otherwise, if sorting are done with the pages table or other fields, there is no need for joining with the rel/word tables: $orderBy = ''; @@ -1187,7 +1231,7 @@ break; } - return $GLOBALS['TYPO3_DB']->exec_SELECTquery( + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( 'ISEC.*, IP.*', 'index_phash IP,index_section ISEC'.$page_join, 'IP.phash IN ('.$list.') '. @@ -1200,6 +1244,8 @@ $orderBy ); } + + return $res; } /** @@ -1224,8 +1270,13 @@ // "phash_t3" is the phash of the parent TYPO3 page row which initiated the indexing of the documents in this section. // So, selecting for the grlist records belonging to the parent phash-row where the current users gr_list exists will help us to know. // If this is NOT found, there is still a theoretical possibility that another user accessible page would display a link, so maybe the resume of such a document here may be unjustified hidden. But better safe than sorry. - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash_t3']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); - if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash_t3']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); + } else { + $res = false; + } + + if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)) { #debug("Look up for external media '".$row['data_filename']."': phash:".$row['phash_t3'].' YES - ('.$GLOBALS['TSFE']->gr_list.")!",1); return TRUE; } else { @@ -1235,8 +1286,13 @@ } else { // Ordinary TYPO3 pages: if (strcmp($row['gr_list'],$GLOBALS['TSFE']->gr_list)) { // Selecting for the grlist records belonging to the phash-row where the current users gr_list exists. If it is found it is proof that this user has direct access to the phash-rows content although he did not himself initiate the indexing... - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); - if ($GLOBALS['TYPO3_DB']->sql_num_rows($res)) { + if ($this->isTableUsed('index_grlist')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($row['phash']).' AND gr_list='.$GLOBALS['TYPO3_DB']->fullQuoteStr($GLOBALS['TSFE']->gr_list, 'index_grlist')); + } else { + $res = false; + } + + if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)) { #debug('Checking on it ...'.$row['item_title'].'/'.$row['phash'].' - YES ('.$GLOBALS['TSFE']->gr_list.")",1); return TRUE; } else { @@ -1251,6 +1307,25 @@ } /** + * Check if the record is still available or if it has been deleted meanwhile. + * Currently this works for files only, since extending it to page content would cause a lot of overhead... + * + * @param array Result row array + * @return boolean Returns true if record is still available + */ + function checkExistance($row) { + $recordExists = true; // Always expect that page content exists + + if ($row['item_type']) { // External media: + if (!is_file($row['data_filename']) || !file_exists($row['data_filename'])) { + $recordExists = false; + } + } + + return $recordExists; + } + + /** * Returns "DESC" or "" depending on the settings of the incoming highest/lowest result order (piVars['desc'] * * @param boolean If true, inverse the order which is defined by piVars['desc'] @@ -1298,6 +1373,28 @@ } } + /** + * md5 integer hash + * Using 7 instead of 8 just because that makes the integers lower than 32 bit (28 bit) and so they do not interfere with UNSIGNED integers or PHP-versions which has varying output from the hexdec function. + * + * @param string String to hash + * @return integer Integer intepretation of the md5 hash of input string. + */ + function md5inthash($str) { + return tx_indexedsearch_indexer::md5inthash($str); + } + + /** + * Check if the tables provided are configured for usage. + * This becomes neccessary for extensions that provide additional database functionality like indexed_search_mysql. + * + * @param string Comma-separated list of tables + * @return boolean True if given tables are enabled + */ + function isTableUsed($table_list) { + return tx_indexedsearch_indexer::isTableUsed($table_list); + } + @@ -1328,7 +1425,12 @@ // Multilangual text $substituteArray = array('searchFor', 'extResume', 'atATime', 'orderBy', 'fromSection', 'searchIn', 'match', 'style', 'freeIndexUid'); foreach ($substituteArray as $marker) { - $markerArray['###FORM_'.t3lib_div::strtoupper($marker).'###'] = $this->pi_getLL('form_'.$marker,'',1); + if (t3lib_div::int_from_ver(TYPO3_version) >= t3lib_div::int_from_ver('4.2')) { + $markerArray['###FORM_'.t3lib_div::strtoupper($marker).'###'] = $this->pi_getLL('form_'.$marker,'',1); + } else { + // TODO: This is a workaround for TYPO3 4.1 which misses t3lib_div::strtoupper() - can be removed when 4.1 isn't used any longer + $markerArray['###FORM_'.strtoupper($marker).'###'] = $this->pi_getLL('form_'.$marker,'',1); + } } $markerArray['###FORM_SUBMIT###'] = $this->pi_getLL('submit_button_label','',1); @@ -1596,7 +1698,12 @@ if (is_array($tmplContent)) { foreach ($tmplContent AS $k => $v) { - $markerArray['###'.t3lib_div::strtoupper($k).'###'] = $v; + if (t3lib_div::int_from_ver(TYPO3_version) >= t3lib_div::int_from_ver('4.2')) { + $markerArray['###'.t3lib_div::strtoupper($k).'###'] = $v; + } else { + // TODO: This is a workaround for TYPO3 4.1 which misses t3lib_div::strtoupper() - can be removed when 4.1 isn't used any longer + $markerArray['###'.strtoupper($k).'###'] = $v; + } } } @@ -1955,13 +2062,20 @@ if ($row['show_resume']) { if (!$noMarkup) { $markedSW = ''; - $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_fulltext', 'phash='.intval($row['phash'])); - if ($ftdrow = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { - // Cut HTTP references after some length - $content = preg_replace('/(http:\/\/[^ ]{60})([^ ]+)/i', '$1...', $ftdrow['fulltextdata']); - $markedSW = $this->markupSWpartsOfString($content); + if ($this->isTableUsed('index_fulltext')) { + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_fulltext', 'phash='.intval($row['phash'])); + } else { + $res = false; + } + + if ($res) { + if ($ftdrow = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) { + // Cut HTTP references after some length + $content = preg_replace('/(http:\/\/[^ ]{60})([^ ]+)/i', '$1...', $ftdrow['fulltextdata']); + $markedSW = $this->markupSWpartsOfString($content); + } + $GLOBALS['TYPO3_DB']->sql_free_result($res); } - $GLOBALS['TYPO3_DB']->sql_free_result($res); } if (!trim($markedSW)) { diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search/pi/considerations.txt typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/pi/considerations.txt --- typo3_src-4.2.2/typo3/sysext/indexed_search/pi/considerations.txt 2008-10-06 12:12:27.000000000 +0200 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search/pi/considerations.txt 2008-11-04 00:59:01.000000000 +0100 @@ -1,3 +1,6 @@ +- Search is always case insensitive. If you need a case sensitive search, use a binary collation for the index_fulltext and index_words tables. + + MAILS about: @@ -17,7 +20,7 @@ If you are an SQL wizard, you may be able to help me here. -In the (coming) index searching thing, I have three main tables. +In the (coming) index searching thing, I have three main tables. - index_words which contains all the words indexed - index_pages which represents a link to a page id or external url @@ -27,13 +30,13 @@ So searching an OR search for "content" and "management" could be done like this: -SELECT STRAIGHT_JOIN [some fields here...] FROM -index_words AS IW, -index_rel AS IR, +SELECT STRAIGHT_JOIN [some fields here...] FROM +index_words AS IW, +index_rel AS IR, index_phash AS IP -WHERE -IR.phash = IP.phash AND -IW.wid=IR.wid AND +WHERE +IR.phash = IP.phash AND +IW.wid=IR.wid AND (IW.baseword = 'content' OR IW.baseword = 'management') [... and here comes some GROUP BY, ORDER BY and LIMIT] @@ -45,30 +48,30 @@ Therefore I tought of a little trick to do it: -SELECT STRAIGHT_JOIN [some fields here...] FROM -index_words AS IW, -index_rel AS IR, -index_words AS IW2, -index_rel AS IR2, +SELECT STRAIGHT_JOIN [some fields here...] FROM +index_words AS IW, +index_rel AS IR, +index_words AS IW2, +index_rel AS IR2, index_phash AS IP -WHERE -IW.wid=IR.wid AND -IW2.wid=IR2.wid AND -IR.phash = IP.phash AND -IR2.phash = IP.phash AND +WHERE +IW.wid=IR.wid AND +IW2.wid=IR2.wid AND +IR.phash = IP.phash AND +IR2.phash = IP.phash AND (IW.baseword = 'content' and IW2.baseword = 'management') [... and here comes some GROUP BY, ORDER BY and LIMIT] -... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins. +... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins. Can anyone help me? -I checked out kwIndex from hotscripts and he does it like this: +I checked out kwIndex from hotscripts and he does it like this: 1) Select the word-ids (SQL-query 1) 2) If both words were found, make another query for all linking-table entries matching the words and group by the word-id. The count(*) statement shows the number equal to the number of searchwords if they were both found. So select only records which delivers this. Then you have the document ids.... (SQL 2) @@ -159,9 +162,9 @@ OK, I am not an sql-wizard. Just some hints and wishes, which I hope are helpful. -> +> > 1) To the AND question: -> +> > Maybe it's best to make a search for each word; After getting the total list > of page-ids from first search, this is included as a condition in the next > search, which generates a new list which is included in the next search, @@ -196,7 +199,7 @@ I have to say a really elaborate, fast and multiformat (pdf's !) search engine is really one the single most important things for every 100+ website. -> +> > 2) Search query syntax > Any suggestions to a search query syntax. > - Search for "content management" is by default AND search @@ -317,3 +320,35 @@ ***************************************************************************************************************** +OK there were some fancy calculations promoted by Graeme Merrall: + +"However, regarding relevance you probably want to look at something like +Salton's formula which is a good easy way to measure relevance. +Oracle Intermedia uses this and it's pretty simple: +Score can be between 0 and 100, but the top-scoring document in the query +will not necessarily have a score of 100 -- scoring is relative, not +absolute. This means that scores are not comparable across indexes, or even +across different queries on the same index. Score for each document is +computed using the standard Salton formula: + + 3f(1+log(N/n)) + +Where f is the frequency of the search term in the document, N is the total +number of rows in the table, and n is the number of rows which contain the +search term. This is converted into an integer in the range 0 - 100. + +There's a good doc on it at +http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/ +although it may be a little complex for what you require so just pick the +relevant parts out. +" + +However I chose not to go with this for several reasons. +I do not claim that my ways of calculating importance here is the best. +ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.) + + + + +***************************************************************************************************************** +***************************************************************************************************************** diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ChangeLog typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ChangeLog --- typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ChangeLog 1970-01-01 01:00:00.000000000 +0100 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ChangeLog 2008-11-04 00:59:01.000000000 +0100 @@ -0,0 +1,3 @@ +2008-03-18 Michael Stucki <michael@typo3.org> + + * Initial version diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php --- typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php 1970-01-01 01:00:00.000000000 +0100 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php 2008-11-04 00:59:01.000000000 +0100 @@ -0,0 +1,239 @@ +<?php +/*************************************************************** +* Copyright notice +* +* (c) 2008 Michael Stucki (michael@typo3.org) +* All rights reserved +* +* This script is part of the TYPO3 project. The TYPO3 project is +* free software; you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation; either version 2 of the License, or +* (at your option) any later version. +* +* The GNU General Public License can be found at +* http://www.gnu.org/copyleft/gpl.html. +* A copy is found in the textfile GPL.txt and important notices to the license +* from the author is found in LICENSE.txt distributed with these scripts. +* +* +* This script is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. +* +* This copyright notice MUST APPEAR in all copies of the script! +***************************************************************/ +/** + * Database handler class + * + * @author Michael Stucki <michael@typo3.org> + */ +/** + * [CLASS/FUNCTION INDEX of SCRIPT] + * + * + * + * TOTAL FUNCTIONS: 0 + * (This index is automatically created/updated by the extension "extdeveval") + * + */ + + + + + + + + + + +/** + * Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries. + * + * @author Michael Stucki <michael@typo3.org> + * @package TYPO3 + * @subpackage tx_indexedsearch_mysql + */ +class tx_indexedsearch_mysql { + var $pObj; + + /** + * Gets a SQL result pointer to traverse for the search records. + * + * @param array Search words + * @param integer Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. + * @return pointer + */ + function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1) { + // Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not + $searchData = $this->getSearchString($sWArr); + + // Perform SQL Search / collection of result rows array: + if ($searchData) { + // Do the search: + $GLOBALS['TT']->push('execFinalQuery'); + $res = $this->execFinalQuery_fulltext($searchData,$freeIndexUid); + $GLOBALS['TT']->pull(); + return $res; + } else { + return false; + } + } + + /** + * Returns a search string for use with MySQL FULLTEXT query + * + * @param array Search word array + * @return string Search string + */ + function getSearchString($sWArr) { + + // Initialize variables: + $count = 0; + + $searchBoolean = false; // Change this to true to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty) + $fulltextIndex = 'index_fulltext.fulltextdata'; + + $naturalSearchString = ''; // This holds the result if the search is natural (doesn't contain any boolean operators) + $booleanSearchString = ''; // This holds the result if the search is boolen (contains +/-/| operators) + + // Traverse searchwords and prefix them with corresponding operator + foreach ($sWArr as $k => $v) { + // Making the query for a single search word based on the search-type + $sWord = $v['sword']; // $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower'); // lower-case all of them... + $trail_natural = ''; + $trail_boolean = ''; + + $theType = (string)$this->pObj->piVars['type']; + if (strstr($sWord,' ')) { + $theType = 20; // If there are spaces in the search-word, make a full text search instead. + } + + switch ($theType) { + case '1': // Part of word + case '3': // Last part of word + // These options are both not possible with fulltext indexing! Therefore, fallback to first-part-of-word search + case '2': // First part of word + $trail_boolean = '*'; + // Part-of-word search requires boolean mode! + $searchBoolean = true; + break; + case '10': // Sounds like + + /** + * Indexer object + * + * @var tx_indexedsearch_indexer + */ + $indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer'); // Initialize the indexer-class + + // Perform metaphone search + $sWord = $indexerObj->metaphone($sWord,$this->pObj->storeMetaphoneInfoAsWords); + + unset($indexerObj); + $fulltextIndex = 'index_fulltext.metaphonedata'; + break; + case '20': // Sentence + $searchBoolean = true; + $sWord = preg_replace('/^"(.*)"$/','$1',$sWord); // Remove existing quotes as they will be added later anyway... + break; + default: // Distinct word + } + + // Perform search for word: + switch ($v['oper']) { + case 'AND NOT': + $booleanSearchString.= ' -' . $sWord . $trail_boolean; + $searchBoolean = true; + break; + case 'OR': + $booleanSearchString.= ' ' . $sWord . $trail_boolean; + $searchBoolean = true; + break; + default: + $booleanSearchString.= ' +' . $sWord . $trail_boolean; + $naturalSearchString.= ' ' . $sWord . $trail_natural; + } + + $count++; + } + + if ($theType=='20') { // Sentence + $searchString = '"'.trim($naturalSearchString).'"'; + + } elseif ($searchBoolean) { + $searchString = trim($booleanSearchString); + + } else { + $searchString = trim($naturalSearchString); + } + + return array( + 'searchBoolean' => $searchBoolean, + 'searchString' => $searchString, + 'fulltextIndex' => $fulltextIndex + ); + } + + /** + * Execute final query, based on phash integer list. The main point is sorting the result in the right order. + * + * @param array Array with search string, boolean indicator, and fulltext index reference + * @param integer Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content. + * @return pointer Query result pointer + */ + function execFinalQuery_fulltext($searchData,$freeIndexUid=-1) { + + // Setting up methods of filtering results based on page types, access, etc. + $page_join = ''; + $page_where = ''; + + // Indexing configuration clause: + $freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid); + + // Calling hook for alternative creation of page ID list + if ($hookObj = &$this->pObj->hookRequest('execFinalQuery_idList')) { + $page_where = $hookObj->execFinalQuery_idList(''); // Originally this hook expects a list of page IDs, so since we don't know them yet, just send an empty string. Users of this hook need to adjust their hook to this! + } elseif ($this->pObj->join_pages) { // Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected. + $page_join = ', + pages'; + $page_where = 'pages.uid = ISEC.page_id + '.$this->pObj->cObj->enableFields('pages').' + AND pages.no_search=0 + AND pages.doktype<200 + '; + } elseif ($this->pObj->wholeSiteIdList>=0) { // Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field! + $siteIdNumbers = t3lib_div::intExplode(',',$this->pObj->wholeSiteIdList); + $id_list = array(); + while(list(,$rootId)=each($siteIdNumbers)) { + $id_list[] = $this->pObj->cObj->getTreeList($rootId,9999,0,0,'','').$rootId; + } + $page_where = ' ISEC.page_id IN ('.implode(',',$id_list).')'; + } else { // Disable everything... (select all) + $page_where = ' 1=1'; + } + + $searchBoolean = ''; + if ($searchData['searchBoolean']) { + $searchBoolean = ' IN BOOLEAN MODE'; + } + + $res = $GLOBALS['TYPO3_DB']->exec_SELECTquery( + 'index_fulltext.*, ISEC.*, IP.*', + 'index_fulltext, index_section ISEC, index_phash IP' . $page_join, + 'MATCH ('.$searchData['fulltextIndex'].') AGAINST ('.$GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'],'index_fulltext').$searchBoolean.') '. + $this->pObj->mediaTypeWhere().' '. + $this->pObj->languageWhere(). + $freeIndexUidClause.' + AND index_fulltext.phash = IP.phash + AND ISEC.phash = IP.phash + AND '.$page_where, + 'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2 ,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId' + ); + + return $res; + } +} + +?> diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ext_emconf.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ext_emconf.php --- typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ext_emconf.php 1970-01-01 01:00:00.000000000 +0100 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ext_emconf.php 2008-11-04 00:59:01.000000000 +0100 @@ -0,0 +1,54 @@ +<?php + +######################################################################## +# Extension Manager/Repository config file for ext: "indexed_search_mysql" +# +# Auto generated 18-03-2008 20:13 +# +# Manual updates: +# Only the data in the array - anything else is removed by next write. +# "version" and "dependencies" must not be touched! +######################################################################## + +$EM_CONF[$_EXTKEY] = array( + 'title' => 'MySQL driver for Indexed Search Engine', + 'description' => 'MySQL specific driver for Indexed Search Engine. Allows usage of MySQL-only features like FULLTEXT indexes.', + 'category' => 'misc', + 'shy' => 0, + 'dependencies' => 'cms,indexed_search', + 'conflicts' => '', + 'priority' => '', + 'loadOrder' => '', + 'module' => '', + 'state' => 'alpha', + 'internal' => 1, + 'uploadfolder' => 0, + 'createDirs' => '', + 'modify_tables' => '', + 'clearCacheOnLoad' => 1, + 'lockType' => '', + 'author' => 'Michael Stucki', + 'author_email' => 'michael@typo3.org', + 'author_company' => '', + 'CGLcompliance' => '', + 'CGLcompliance_note' => '', + 'version' => '2.10.0', + '_md5_values_when_last_written' => '', + 'constraints' => array( + 'depends' => array( + 'cms' => '', + 'php' => '5.2.0-', + 'typo3' => '4.2.0-', + 'indexed_search' => '2.10.0-' + ), + 'conflicts' => array( + ), + 'suggests' => array( + 'doc_indexed_search' => '' + ), + ), + 'suggests' => array( + ), +); + +?> diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ext_localconf.php typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ext_localconf.php --- typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ext_localconf.php 1970-01-01 01:00:00.000000000 +0100 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ext_localconf.php 2008-11-04 00:59:01.000000000 +0100 @@ -0,0 +1,10 @@ +<?php +if (!defined ('TYPO3_MODE')) die ('Access denied.'); + + // Configure hook to query the fulltext index +$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['pi1_hooks']['getResultRows_SQLpointer'] = 'EXT:indexed_search_mysql/class.tx_indexedsearch_mysql.php:&tx_indexedsearch_mysql'; + + // Use all index_* tables except "index_rel" and "index_words" +$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config'; + +?> diff -rNu typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ext_tables.sql typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ext_tables.sql --- typo3_src-4.2.2/typo3/sysext/indexed_search_mysql/ext_tables.sql 1970-01-01 01:00:00.000000000 +0100 +++ typo3_src-4.2.2_mysql_fulltext_index/typo3/sysext/indexed_search_mysql/ext_tables.sql 2008-11-04 00:59:01.000000000 +0100 @@ -0,0 +1,16 @@ +# +# Table structure for table 'index_fulltext' +# +# Differences compared to original definition in EXT:indexed_search are as follows: +# - Add new mediumtext field "metaphonedata" +# - Add new FULLTEXT index "fulltextdata" +# - Add new FULLTEXT index "metaphonedata" +# - Change table engine from InnoDB to MyISAM (required for FULLTEXT indexing) +CREATE TABLE index_fulltext ( + phash int(11) DEFAULT '0' NOT NULL, + fulltextdata mediumtext, + metaphonedata mediumtext, + PRIMARY KEY (phash) + FULLTEXT fulltextdata (fulltextdata) + FULLTEXT metaphonedata (metaphonedata) +) ENGINE=MyISAM;