Bug #17054 » indexed_search_fulltext.diff

Administrator Admin, 2008-11-08 20:09

View differences:

typo3/sysext/indexed_search_mysql/class.tx_indexedsearch_mysql.php (Revision 0)
1
<?php
2
/***************************************************************
3
*  Copyright notice
4
*
5
*  (c) 2008 Michael Stucki (michael@typo3.org)
6
*  All rights reserved
7
*
8
*  This script is part of the TYPO3 project. The TYPO3 project is
9
*  free software; you can redistribute it and/or modify
10
*  it under the terms of the GNU General Public License as published by
11
*  the Free Software Foundation; either version 2 of the License, or
12
*  (at your option) any later version.
13
*
14
*  The GNU General Public License can be found at
15
*  http://www.gnu.org/copyleft/gpl.html.
16
*  A copy is found in the textfile GPL.txt and important notices to the license
17
*  from the author is found in LICENSE.txt distributed with these scripts.
18
*
19
*
20
*  This script is distributed in the hope that it will be useful,
21
*  but WITHOUT ANY WARRANTY; without even the implied warranty of
22
*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23
*  GNU General Public License for more details.
24
*
25
*  This copyright notice MUST APPEAR in all copies of the script!
26
***************************************************************/
27
/**
28
 * Database handler class
29
 *
30
 * @author	Michael Stucki <michael@typo3.org>
31
 */
32
/**
33
 * [CLASS/FUNCTION INDEX of SCRIPT]
34
 *
35
 *
36
 *
37
 * TOTAL FUNCTIONS: 0
38
 * (This index is automatically created/updated by the extension "extdeveval")
39
 *
40
 */
41

  
42

  
43

  
44

  
45

  
46

  
47

  
48

  
49

  
50

  
51
/**
52
 * Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries.
53
 *
54
 * @author	Michael Stucki <michael@typo3.org>
55
 * @package TYPO3
56
 * @subpackage tx_indexedsearch_mysql
57
 */
58
class tx_indexedsearch_mysql {
59
	var $pObj;
60

  
61
	/**
62
	 * Gets a SQL result pointer to traverse for the search records.
63
	 *
64
	 * @param	array		Search words
65
	 * @param	integer		Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
66
	 * @return	pointer
67
	 */
68
	function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1)	{
69
			// Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
70
		$searchData = $this->getSearchString($sWArr);
71

  
72
			// Perform SQL Search / collection of result rows array:
73
		if ($searchData) {
74
				// Do the search:
75
			$GLOBALS['TT']->push('execFinalQuery');
76
			$res = $this->execFinalQuery_fulltext($searchData,$freeIndexUid);
77
			$GLOBALS['TT']->pull();
78
			return $res;
79
		} else {
80
			return false;
81
		}
82
	}
83

  
84
	/**
85
	 * Returns a search string for use with MySQL FULLTEXT query
86
	 *
87
	 * @param	array		Search word array
88
	 * @return	string		Search string
89
	 */
90
	function getSearchString($sWArr)	{
91

  
92
			// Initialize variables:
93
		$count = 0;
94

  
95
		$searchBoolean = false;	// Change this to true to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
96
		$fulltextIndex = 'index_fulltext.fulltextdata';
97

  
98
		$naturalSearchString = '';	// This holds the result if the search is natural (doesn't contain any boolean operators)
99
		$booleanSearchString = '';	// This holds the result if the search is boolen (contains +/-/| operators)
100

  
101
			// Traverse searchwords and prefix them with corresponding operator
102
		foreach ($sWArr as $k => $v) {
103
				// Making the query for a single search word based on the search-type
104
			$sWord = $v['sword'];	// $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower');	// lower-case all of them...
105
			$trail_natural = '';
106
			$trail_boolean = '';
107

  
108
			$theType = (string)$this->pObj->piVars['type'];
109
			if (strstr($sWord,' ')) {
110
				$theType = 20;	// If there are spaces in the search-word, make a full text search instead.
111
			}
112

  
113
			switch ($theType) {
114
				case '1':	// Part of word
115
				case '3':	// Last part of word
116
					// These options are both not possible with fulltext indexing! Therefore, fallback to first-part-of-word search
117
				case '2':	// First part of word
118
					$trail_boolean = '*';
119
						// Part-of-word search requires boolean mode!
120
					$searchBoolean = true;
121
				break;
122
				case '10':	// Sounds like
123

  
124
					/**
125
					 * Indexer object
126
					 *
127
					 * @var tx_indexedsearch_indexer
128
					 */
129
					$indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');	// Initialize the indexer-class
130

  
131
						// Perform metaphone search
132
					$sWord = $indexerObj->metaphone($sWord,$this->pObj->storeMetaphoneInfoAsWords);
133

  
134
					unset($indexerObj);
135
					$fulltextIndex = 'index_fulltext.metaphonedata';
136
				break;
137
				case '20':	// Sentence
138
					$searchBoolean = true;
139
					$sWord = preg_replace('/^"(.*)"$/','$1',$sWord);	// Remove existing quotes as they will be added later anyway...
140
				break;
141
				default:	// Distinct word
142
			}
143

  
144
				// Perform search for word:
145
			switch ($v['oper']) {
146
				case 'AND NOT':
147
					$booleanSearchString.= ' -' . $sWord . $trail_boolean;
148
					$searchBoolean = true;
149
				break;
150
				case 'OR':
151
					$booleanSearchString.= ' ' . $sWord . $trail_boolean;
152
					$searchBoolean = true;
153
				break;
154
				default:
155
					$booleanSearchString.= ' +' . $sWord . $trail_boolean;
156
					$naturalSearchString.= ' ' . $sWord . $trail_natural;
157
			}
158

  
159
			$count++;
160
		}
161

  
162
		if ($theType=='20') {	// Sentence
163
			$searchString = '"'.trim($naturalSearchString).'"';
164

  
165
		} elseif ($searchBoolean) {
166
			$searchString = trim($booleanSearchString);
167

  
168
		} else {
169
			$searchString = trim($naturalSearchString);
170
		}
171

  
172
		return array(
173
				'searchBoolean' => $searchBoolean,
174
				'searchString' => $searchString,
175
				'fulltextIndex' => $fulltextIndex
176
			);
177
	}
178

  
179
	/**
180
	 * Execute final query, based on phash integer list. The main point is sorting the result in the right order.
181
	 *
182
	 * @param	array		Array with search string, boolean indicator, and fulltext index reference
183
	 * @param	integer		Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
184
	 * @return	pointer		Query result pointer
185
	 */
186
	function execFinalQuery_fulltext($searchData,$freeIndexUid=-1)	{
187

  
188
			// Setting up methods of filtering results based on page types, access, etc.
189
		$page_join = '';
190
		$page_where = '';
191

  
192
			// Indexing configuration clause:
193
		$freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid);
194

  
195
			// Calling hook for alternative creation of page ID list
196
		if ($hookObj = &$this->pObj->hookRequest('execFinalQuery_idList')) {
197
			$page_where = $hookObj->execFinalQuery_idList('');	// Originally this hook expects a list of page IDs, so since we don't know them yet, just send an empty string. Users of this hook need to adjust their hook to this!
198
		} elseif ($this->pObj->join_pages) {	// Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected.
199
			$page_join = ',
200
				pages';
201
			$page_where = 'pages.uid = ISEC.page_id
202
				'.$this->pObj->cObj->enableFields('pages').'
203
				AND pages.no_search=0
204
				AND pages.doktype<200
205
			';
206
		} elseif ($this->pObj->wholeSiteIdList>=0) {	// Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field!
207
			$siteIdNumbers = t3lib_div::intExplode(',',$this->pObj->wholeSiteIdList);
208
			$id_list = array();
209
			while(list(,$rootId)=each($siteIdNumbers)) {
210
				$id_list[] = $this->pObj->cObj->getTreeList($rootId,9999,0,0,'','').$rootId;
211
			}
212
			$page_where = ' ISEC.page_id IN ('.implode(',',$id_list).')';
213
		} else {	// Disable everything... (select all)
214
			$page_where = ' 1=1';
215
		}
216

  
217
		$searchBoolean = '';
218
		if ($searchData['searchBoolean']) {
219
			$searchBoolean = ' IN BOOLEAN MODE';
220
		}
221

  
222
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
223
				'index_fulltext.*, ISEC.*, IP.*',
224
				'index_fulltext, index_section ISEC, index_phash IP' . $page_join,
225
				'MATCH ('.$searchData['fulltextIndex'].') AGAINST ('.$GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'],'index_fulltext').$searchBoolean.') '.
226
					$this->pObj->mediaTypeWhere().' '.
227
					$this->pObj->languageWhere().
228
					$freeIndexUidClause.'
229
					AND index_fulltext.phash = IP.phash
230
					AND ISEC.phash = IP.phash
231
					AND '.$page_where,
232
				'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2 ,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId'
233
			);
234

  
235
		return $res;
236
	}
237
}
238

  
239
?>
typo3/sysext/indexed_search_mysql/ext_localconf.php (Revision 0)
1
<?php
2
if (!defined ('TYPO3_MODE')) 	die ('Access denied.');
3

  
4
	// Configure hook to query the fulltext index
5
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['pi1_hooks']['getResultRows_SQLpointer'] = 'EXT:indexed_search_mysql/class.tx_indexedsearch_mysql.php:&tx_indexedsearch_mysql';
6

  
7
	// Use all index_* tables except "index_rel" and "index_words"
8
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
9

  
10
?>
typo3/sysext/indexed_search_mysql/ChangeLog (Revision 0)
1
2008-03-18  Michael Stucki  <michael@typo3.org>
2

  
3
	* Initial version
typo3/sysext/indexed_search_mysql/ext_tables.sql (Revision 0)
1
#
2
# Table structure for table 'index_fulltext'
3
#
4
# Differences compared to original definition in EXT:indexed_search are as follows:
5
# - Add new mediumtext field "metaphonedata"
6
# - Add new FULLTEXT index "fulltextdata"
7
# - Add new FULLTEXT index "metaphonedata"
8
# - Change table engine from InnoDB to MyISAM (required for FULLTEXT indexing)
9
CREATE TABLE index_fulltext (
10
  phash int(11) DEFAULT '0' NOT NULL,
11
  fulltextdata mediumtext,
12
  metaphonedata mediumtext,
13
  PRIMARY KEY (phash)
14
  FULLTEXT fulltextdata (fulltextdata)
15
  FULLTEXT metaphonedata (metaphonedata)
16
) ENGINE=MyISAM;
typo3/sysext/indexed_search_mysql/ext_emconf.php (Revision 0)
1
<?php
2

  
3
########################################################################
4
# Extension Manager/Repository config file for ext: "indexed_search_mysql"
5
#
6
# Auto generated 03-11-2008 23:18
7
#
8
# Manual updates:
9
# Only the data in the array - anything else is removed by next write.
10
# "version" and "dependencies" must not be touched!
11
########################################################################
12

  
13
$EM_CONF[$_EXTKEY] = array(
14
	'title' => 'MySQL driver for Indexed Search Engine',
15
	'description' => 'MySQL specific driver for Indexed Search Engine. Allows usage of MySQL-only features like FULLTEXT indexes.',
16
	'category' => 'misc',
17
	'shy' => 0,
18
	'dependencies' => 'cms,indexed_search',
19
	'conflicts' => '',
20
	'priority' => '',
21
	'loadOrder' => '',
22
	'module' => '',
23
	'state' => 'alpha',
24
	'internal' => 1,
25
	'uploadfolder' => 0,
26
	'createDirs' => '',
27
	'modify_tables' => '',
28
	'clearCacheOnLoad' => 1,
29
	'lockType' => '',
30
	'author' => 'Michael Stucki',
31
	'author_email' => 'michael@typo3.org',
32
	'author_company' => '',
33
	'CGLcompliance' => '',
34
	'CGLcompliance_note' => '',
35
	'version' => '2.10.0',
36
	'_md5_values_when_last_written' => 'a:5:{s:9:"ChangeLog";s:4:"1bb1";s:32:"class.tx_indexedsearch_mysql.php";s:4:"3a48";s:17:"ext_localconf.php";s:4:"31c9";s:14:"ext_tables.php";s:4:"c4b7";s:14:"ext_tables.sql";s:4:"7f93";}',
37
	'constraints' => array(
38
		'depends' => array(
39
			'cms' => '',
40
			'php' => '5.2.0-0.0.0',
41
			'typo3' => '4.2.0-0.0.0',
42
			'indexed_search' => '2.10.0-',
43
		),
44
		'conflicts' => array(
45
		),
46
		'suggests' => array(
47
			'doc_indexed_search' => '',
48
		),
49
	),
50
	'suggests' => array(
51
	),
52
);
53

  
54
?>
typo3/sysext/indexed_search/ChangeLog (Arbeitskopie)
1
2008-11-03  Michael Stucki  <michael@typo3.org>
2

  
3
	* Check if files in search matches are still existing before displaying them
4

  
1 5
2008-04-01  Michael Stucki  <michael@typo3.org>
2 6

  
3 7
	* Fixed bug #7980: Fix wrong TypoScript code in plugin template
typo3/sysext/indexed_search/class.indexer.php (Arbeitskopie)
191 191
	var $freqRange = 32000;
192 192
	var $freqMax = 0.1;
193 193

  
194
	var $enableMetaphoneSearch = false;
195
	var $storeMetaphoneInfoAsWords;
196
	var $metaphoneContent = '';
197

  
194 198
		// Objects:
195 199
	/**
196 200
	 * Charset class object
......
452 456
		$this->maxExternalFiles = t3lib_div::intInRange($this->indexerConfig['maxExternalFiles'],0,1000,5);
453 457
		$this->flagBitMask = t3lib_div::intInRange($this->indexerConfig['flagBitMask'],0,255);
454 458

  
459
			// Workaround: If the extension configuration was not updated yet, the value is not existing
460
		$this->enableMetaphoneSearch = isset($this->indexerConfig['enableMetaphoneSearch']) ? ($this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0) : 1;
461

  
462
		$this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : ($this->enableMetaphoneSearch ? true : false);
463

  
455 464
			// Initialize external document parsers:
456 465
			// Example configuration, see ext_localconf.php of this file!
457 466
		if ($this->conf['index_externals'])	{
......
468 477

  
469 478
			// Initialize metaphone hook:
470 479
			// Example configuration (localconf.php) for this hook: $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
471
		if ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'])	{
480
			// Make sure that the hook is loaded _after_ indexed_search as this may overwrite the hook depending on the configuration.
481
		if ($this->enableMetaphoneSearch && $TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'])	{
472 482
			$this->metaphoneObj = &t3lib_div::getUserObj($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone']);
473 483
			$this->metaphoneObj->pObj = &$this;
474 484
		}
......
550 560
			$this->log_pull();
551 561

  
552 562
				// Calculating a hash over what is to be the actual page content. Maybe this hash should not include title,description and keywords? The bodytext is the primary concern. (on the other hand a changed page-title would make no difference then, so dont!)
553
			$this->content_md5h = $this->md5inthash(implode($this->contentParts,''));
563
			$this->content_md5h = $this->md5inthash(implode('', $this->contentParts));
554 564

  
555 565
				// This function checks if there is already a page (with gr_list = 0,-1) indexed and if that page has the very same contentHash.
556 566
				// If the contentHash is the same, then we can rest assured that this page is already indexed and regardless of mtime and origContent we don't need to do anything more.
......
580 590

  
581 591
						// Check words and submit to word list if not there
582 592
				$this->log_push('Check word list and submit words','');
583
					$this->checkWordList($indexArr);
584
					$this->submitWords($indexArr,$this->hash['phash']);
593
					if ($this->isTableUsed('index_words')) {
594
						$this->checkWordList($indexArr);
595
						$this->submitWords($indexArr,$this->hash['phash']);
596
					}
585 597
				$this->log_pull();
586 598

  
587 599
						// Set parsetime
......
1055 1067

  
1056 1068
										// Check words and submit to word list if not there
1057 1069
									$this->log_push('Check word list and submit words','');
1058
										$this->checkWordList($indexArr);
1059
										$this->submitWords($indexArr,$phash_arr['phash']);
1070
										if ($this->isTableUsed('index_words')) {
1071
											$this->checkWordList($indexArr);
1072
											$this->submitWords($indexArr,$phash_arr['phash']);
1073
										}
1060 1074
									$this->log_pull();
1061 1075

  
1062 1076
										// Set parsetime
......
1244 1258
		$this->analyzeHeaderinfo($indexArr,$content,'description',5);
1245 1259
		$this->analyzeBody($indexArr,$content);
1246 1260

  
1247
		return ($indexArr);
1261
		return $indexArr;
1248 1262
	}
1249 1263

  
1250 1264
	/**
......
1257 1271
	 * @return	void
1258 1272
	 */
1259 1273
	function analyzeHeaderinfo(&$retArr,$content,$key,$offset) {
1260
		reset($content[$key]);
1261
		while(list(,$val)=each($content[$key]))  {
1262
			$val = substr($val,0,60);	// Max 60 - because the baseword varchar IS 60. This MUST be the same.
1274
		foreach ($content[$key] as $val) {
1275
			$val = substr($val,0,60);	// Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same.
1276

  
1277
			if (!isset($retArr[$val])) {
1278
					// Word ID (wid)
1279
				$retArr[$val]['hash'] = $this->md5inthash($val);
1280

  
1281
					// Metaphone value is also 60 only chars long
1282
				$metaphone = $this->enableMetaphoneSearch
1283
						? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60)
1284
						: '';
1285
				$retArr[$val]['metaphone'] = $metaphone;
1286
			}
1287

  
1288
				// Build metaphone fulltext string (can be used for fulltext indexing)
1289
			if ($this->storeMetaphoneInfoAsWords) {
1290
				$this->metaphoneContent.= ' '.$retArr[$val]['metaphone'];
1291
			}
1292

  
1293
				// Priority used for flagBitMask feature (see extension configuration)
1263 1294
			$retArr[$val]['cmp'] = $retArr[$val]['cmp']|pow(2,$offset);
1264
			$retArr[$val]['count'] = $retArr[$val]['count']+1;
1265
			$retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
1266
			$retArr[$val]['metaphone'] = $this->metaphone($val);
1295

  
1296
			$retArr[$val]['count']++;	// Increase number of occurences
1267 1297
			$this->wordcount++;
1268 1298
		}
1269 1299
	}
......
1276 1306
	 * @return	void
1277 1307
	 */
1278 1308
	function analyzeBody(&$retArr,$content) {
1279
		foreach($content['body'] as $key => $val)	{
1280
			$val = substr($val,0,60);	// Max 60 - because the baseword varchar IS 60. This MUST be the same.
1281
			if(!isset($retArr[$val])) {
1309
		foreach ($content['body'] as $key => $val) {
1310
			$val = substr($val,0,60);	// Cut after 60 chars because the index_words.baseword varchar field has this length. This MUST be the same.
1311

  
1312
			if (!isset($retArr[$val])) {
1313
					// First occurence (used for ranking results)
1282 1314
				$retArr[$val]['first'] = $key;
1283
				$retArr[$val]['hash'] = hexdec(substr(md5($val),0,7));
1284
				$retArr[$val]['metaphone'] = $this->metaphone($val);
1315

  
1316
					// Word ID (wid)
1317
				$retArr[$val]['hash'] = $this->md5inthash($val);
1318

  
1319
					// Metaphone value is also only 60 chars long
1320
				$metaphone = $this->enableMetaphoneSearch
1321
						? substr($this->metaphone($val,$this->storeMetaphoneInfoAsWords),0,60)
1322
						: '';
1323
				$retArr[$val]['metaphone'] = $metaphone;
1285 1324
			}
1286
			$retArr[$val]['count'] = $retArr[$val]['count']+1;
1325

  
1326
				// Build metaphone fulltext string (can be used for fulltext indexing)
1327
			if ($this->storeMetaphoneInfoAsWords) {
1328
				$this->metaphoneContent.= ' '.$retArr[$val]['metaphone'];
1329
			}
1330

  
1331
			$retArr[$val]['count']++;	// Increase number of occurences
1287 1332
			$this->wordcount++;
1288 1333
		}
1289 1334
	}
......
1300 1345
		if (is_object($this->metaphoneObj))	{
1301 1346
			$tmp = $this->metaphoneObj->metaphone($word, $this->conf['sys_language_uid']);
1302 1347
		} else {
1348
				// Use native PHP function instead of advanced doubleMetaphone class
1303 1349
			$tmp = metaphone($word);
1304 1350
		}
1305 1351

  
1306
			// Return raw value?
1307
		if ($retRaw)	return $tmp;
1352
		if ($retRaw) {	// Return raw value?
1353
			$ret = $tmp;
1354
		} elseif (strlen($tmp)) {	// Otherwise create hash and return integer
1355
			$ret = $this->md5inthash($tmp);
1356
		} else {
1357
			$ret = 0;
1358
		}
1308 1359

  
1309
			// Otherwise create hash and return integer
1310
		if($tmp=='') $ret=0; else $ret=hexdec(substr(md5($tmp),0,7));
1311 1360
		return $ret;
1312 1361
	}
1313 1362

  
......
1368 1417
 			'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
1369 1418
		);
1370 1419

  
1371
		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
1420
		if ($this->isTableUsed('index_phash')) {
1421
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
1422
		}
1372 1423

  
1373 1424
			// PROCESSING index_section
1374 1425
		$this->submit_section($this->hash['phash'],$this->hash['phash']);
......
1379 1430
			// PROCESSING index_fulltext
1380 1431
		$fields = array(
1381 1432
			'phash' => $this->hash['phash'],
1382
			'fulltextdata' => implode(' ', $this->contentParts)
1433
			'fulltextdata' => implode(' ', $this->contentParts),
1434
			'metaphonedata' => $this->metaphoneContent
1383 1435
		);
1384 1436
		if ($this->indexerConfig['fullTextDataLength']>0)	{
1385 1437
			$fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']);
1386 1438
		}
1387
		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
1439
		if ($this->isTableUsed('index_fulltext')) {
1440
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
1441
		}
1388 1442

  
1389 1443
			// PROCESSING index_debug
1390 1444
		if ($this->indexerConfig['debugMode'])	{
......
1399 1453
						'lexer' => $this->lexerObj->debugString,
1400 1454
					))
1401 1455
			);
1402
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
1456
			if ($this->isTableUsed('index_debug')) {
1457
				$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
1458
			}
1403 1459
		}
1404 1460
	}
1405 1461

  
......
1420 1476
			'hash_gr_list' => $this->md5inthash($this->conf['gr_list']),
1421 1477
			'gr_list' => $this->conf['gr_list']
1422 1478
		);
1423
		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
1479
		if ($this->isTableUsed('index_grlist')) {
1480
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_grlist', $fields);
1481
		}
1424 1482
	}
1425 1483

  
1426 1484
	/**
......
1440 1498

  
1441 1499
		$this->getRootLineFields($fields);
1442 1500

  
1443
		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
1501
		if ($this->isTableUsed('index_section')) {
1502
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_section', $fields);
1503
		}
1444 1504
	}
1445 1505

  
1446 1506
	/**
......
1452 1512
	function removeOldIndexedPages($phash)	{
1453 1513
			// Removing old registrations for all tables. Because the pages are TYPO3 pages there can be nothing else than 1-1 relations here.
1454 1514
		$tableArr = explode(',','index_phash,index_section,index_grlist,index_fulltext,index_debug');
1455
		foreach($tableArr as $table)	{
1456
			$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1515
		foreach ($tableArr as $table) {
1516
			if ($this->isTableUsed($table)) {
1517
				$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1518
			}
1457 1519
		}
1458 1520
			// Removing all index_section records with hash_t3 set to this hash (this includes such records set for external media on the page as well!). The re-insert of these records are done in indexRegularDocument($file).
1459
		$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
1521
		if ($this->isTableUsed('index_section')) {
1522
			$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_section', 'phash_t3='.intval($phash));
1523
		}
1460 1524
	}
1461 1525

  
1462 1526

  
......
1520 1584
			'tstamp' => time(),
1521 1585
			'crdate' => time(),
1522 1586
			'gr_list' => $this->conf['gr_list'],
1523
 			'externalUrl' => $fileParts['scheme'] ? 1 : 0,
1524
 			'recordUid' => intval($this->conf['recordUid']),
1525
 			'freeIndexUid' => intval($this->conf['freeIndexUid']),
1526
 			'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
1587
			'externalUrl' => $fileParts['scheme'] ? 1 : 0,
1588
			'recordUid' => intval($this->conf['recordUid']),
1589
			'freeIndexUid' => intval($this->conf['freeIndexUid']),
1590
			'freeIndexSetId' => intval($this->conf['freeIndexSetId']),
1527 1591
		);
1528
		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
1592
		if ($this->isTableUsed('index_phash')) {
1593
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_phash', $fields);
1594
		}
1529 1595

  
1530 1596
			// PROCESSING index_fulltext
1531 1597
		$fields = array(
1532 1598
			'phash' => $hash['phash'],
1533
			'fulltextdata' => implode(' ', $contentParts)
1599
			'fulltextdata' => implode(' ', $contentParts),
1600
			'metaphonedata' => $this->metaphoneContent
1534 1601
		);
1535 1602
		if ($this->indexerConfig['fullTextDataLength']>0)	{
1536 1603
			$fields['fulltextdata'] = substr($fields['fulltextdata'],0,$this->indexerConfig['fullTextDataLength']);
1537 1604
		}
1538
		$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
1605
		if ($this->isTableUsed('index_fulltext')) {
1606
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_fulltext', $fields);
1607
		}
1539 1608

  
1540 1609
			// PROCESSING index_debug
1541 1610
		if ($this->indexerConfig['debugMode'])	{
......
1548 1617
						'lexer' => $this->lexerObj->debugString,
1549 1618
					))
1550 1619
			);
1551
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
1620
			if ($this->isTableUsed('index_debug')) {
1621
				$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_debug', $fields);
1622
			}
1552 1623
		}
1553 1624
	}
1554 1625

  
......
1560 1631
	 */
1561 1632
	function submitFile_grlist($hash)	{
1562 1633
			// Testing if there is a gr_list record for a non-logged in user and if so, there is no need to place another one.
1563
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')');
1564
		if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{
1634
		if ($this->isTableUsed('index_grlist')) {
1635
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($hash).' AND (hash_gr_list='.$this->md5inthash($this->defaultGrList).' OR hash_gr_list='.$this->md5inthash($this->conf['gr_list']).')');
1636
		} else {
1637
			$res = false;
1638
		}
1639

  
1640
		if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{
1565 1641
			$this->submit_grlist($hash,$hash);
1566 1642
		}
1567 1643
	}
......
1573 1649
	 * @return	void
1574 1650
	 */
1575 1651
	function submitFile_section($hash)	{
1576
			// Testing if there is a section
1577
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));
1578
		if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{
1652
			// Testing if there is already a section
1653
		if ($this->isTableUsed('index_section')) {
1654
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_section', 'phash='.intval($hash).' AND page_id='.intval($this->conf['id']));
1655
		} else {
1656
			$res = false;
1657
		}
1658

  
1659
		if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res)) {
1579 1660
			$this->submit_section($hash,$this->hash['phash']);
1580 1661
		}
1581 1662
	}
......
1587 1668
	 * @return	void
1588 1669
	 */
1589 1670
	function removeOldIndexedFiles($phash)	{
1590

  
1591 1671
			// Removing old registrations for tables.
1592 1672
		$tableArr = explode(',','index_phash,index_grlist,index_fulltext,index_debug');
1593
		foreach($tableArr as $table)	{
1594
			$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1673
		foreach ($tableArr as $table) {
1674
			if ($this->isTableUsed($table)) {
1675
				$GLOBALS['TYPO3_DB']->exec_DELETEquery($table, 'phash='.intval($phash));
1676
			}
1595 1677
		}
1596 1678
	}
1597 1679

  
......
1623 1705
	 * @return	integer		Result integer: Generally: <0 = No indexing, >0 = Do indexing (see $this->reasons): -2) Min age was NOT exceeded and so indexing cannot occur.  -1) mtime matched so no need to reindex page. 0) N/A   1) Max age exceeded, page must be indexed again.   2) mtime of indexed page doesn't match mtime given for current content and we must index page.  3) No mtime was set, so we will index...  4) No indexed page found, so of course we will index.
1624 1706
	 */
1625 1707
	function checkMtimeTstamp($mtime,$phash)	{
1708
		$out = 0;
1626 1709

  
1627 1710
			// Select indexed page:
1628
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
1629
		$out = 0;
1711
		if ($this->isTableUsed('index_phash')) {
1712
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('item_mtime,tstamp', 'index_phash', 'phash='.intval($phash));
1713
		} else {
1714
			$res = false;
1715
		}
1630 1716

  
1631 1717
			// If there was an indexing of the page...:
1632
		if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
1718
		if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1633 1719
			if ($this->tstamp_maxAge && ($row['tstamp']+$this->tstamp_maxAge) < time())	{	// If max age is exceeded, index the page
1634 1720
				$out = 1;		// The configured max-age was exceeded for the document and thus it's indexed.
1635 1721
			} else {
......
1660 1746
	 */
1661 1747
	function checkContentHash()	{
1662 1748
			// With this query the page will only be indexed if it's content is different from the same "phash_grouping" -page.
1663
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash A', 'A.phash_grouping='.intval($this->hash['phash_grouping']).' AND A.contentHash='.intval($this->content_md5h));
1664
		if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
1749
		if ($this->isTableUsed('index_phash')) {
1750
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_phash', 'phash_grouping='.intval($this->hash['phash_grouping']).' AND contentHash='.intval($this->content_md5h));
1751
		} else {
1752
			$res = false;
1753
		}
1754

  
1755
		if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
1665 1756
			return $row;
1666 1757
		}
1667 1758
		return 1;
......
1676 1767
	 * @return	boolean		Returns true if the document needs to be indexed (that is, there was no result)
1677 1768
	 */
1678 1769
	function checkExternalDocContentHash($hashGr,$content_md5h)	{
1679
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A', 'A.phash_grouping='.intval($hashGr).' AND A.contentHash='.intval($content_md5h));
1680
		if ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
1770
		if ($this->isTableUsed('index_phash')) {
1771
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash', 'phash_grouping='.intval($hashGr).' AND contentHash='.intval($content_md5h));
1772
		} else {
1773
			$res = false;
1774
		}
1775

  
1776
		if ($res && $row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
1681 1777
			return 0;
1682 1778
		}
1683 1779
		return 1;
......
1690 1786
	 * @return	void
1691 1787
	 */
1692 1788
	function is_grlist_set($phash_x)	{
1693
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));
1694
		return $GLOBALS['TYPO3_DB']->sql_num_rows($res);
1789
		if ($this->isTableUsed('index_grlist')) {
1790
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash_x', 'index_grlist', 'phash_x='.intval($phash_x));
1791
		} else {
1792
			$res = false;
1793
		}
1794
		return $res ? $GLOBALS['TYPO3_DB']->sql_num_rows($res) : false;
1695 1795
	}
1696 1796

  
1697 1797
	/**
......
1703 1803
	 * @see submit_grlist()
1704 1804
	 */
1705 1805
	function update_grlist($phash,$phash_x)	{
1706
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));
1707
		if (!$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{
1806
		if ($this->isTableUsed('index_grlist')) {
1807
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('phash', 'index_grlist', 'phash='.intval($phash).' AND hash_gr_list='.$this->md5inthash($this->conf['gr_list']));
1808
		} else {
1809
			$res = false;
1810
		}
1811

  
1812
		if ($res && !$GLOBALS['TYPO3_DB']->sql_num_rows($res))	{
1708 1813
			$this->submit_grlist($phash,$phash_x);
1709 1814
			$this->log_setTSlogMessage("Inserted gr_list '".$this->conf['gr_list']."' for phash '".$phash."'",1);
1710 1815
		}
......
1723 1828
		);
1724 1829
		if ($mtime)	{ $updateFields['item_mtime'] = intval($mtime); }
1725 1830

  
1726
		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1831
		if ($this->isTableUsed('index_phash')) {
1832
			$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1833
		}
1727 1834
	}
1728 1835

  
1729 1836
	/**
......
1737 1844
			'freeIndexSetId' => intval($this->conf['freeIndexSetId'])
1738 1845
		);
1739 1846

  
1740
		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1847
		if ($this->isTableUsed('index_phash')) {
1848
			$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1849
		}
1741 1850
	}
1742 1851

  
1743 1852
	/**
......
1752 1861
			'parsetime' => intval($parsetime)
1753 1862
		);
1754 1863

  
1755
		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1864
		if ($this->isTableUsed('index_phash')) {
1865
			$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_phash', 'phash='.intval($phash), $updateFields);
1866
		}
1756 1867
	}
1757 1868

  
1758 1869
	/**
......
1765 1876
		$updateFields = array();
1766 1877
		$this->getRootLineFields($updateFields);
1767 1878

  
1768
		$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);
1879
		if ($this->isTableUsed('index_section')) {
1880
			$GLOBALS['TYPO3_DB']->exec_UPDATEquery('index_section', 'page_id='.intval($this->conf['id']), $updateFields);
1881
		}
1769 1882
	}
1770 1883

  
1771 1884
	/**
......
1795 1908
	 * @return	void
1796 1909
	 */
1797 1910
	function removeLoginpagesWithContentHash()	{
1798
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '
1911
		if ($this->isTableUsed('index_phash,index_grlist')) {
1912
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'index_phash A,index_grlist B', '
1799 1913
					A.phash=B.phash
1800 1914
					AND A.phash_grouping='.intval($this->hash['phash_grouping']).'
1801 1915
					AND B.hash_gr_list!='.$this->md5inthash($this->defaultGrList).'
1802 1916
					AND A.contentHash='.intval($this->content_md5h));
1803
		while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
1804
			$this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1);
1805
			$this->removeOldIndexedPages($row['phash']);
1917
		} else {
1918
			$res = false;
1806 1919
		}
1920

  
1921
		if ($res) {
1922
			while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
1923
				$this->log_setTSlogMessage("The currently indexed page was indexed under no user-login and apparently this page has been indexed under login conditions earlier, but with the SAME content. Therefore the old similar page with phash='".$row['phash']."' are now removed.",1);
1924
				$this->removeOldIndexedPages($row['phash']);
1925
			}
1926
		}
1807 1927
	}
1808 1928

  
1809 1929
	/**
......
1817 1937
		require_once(t3lib_extMgm::extPath('crawler').'class.tx_crawler_lib.php');
1818 1938
	}
1819 1939

  
1940
	/**
1941
	 * Check if the tables provided are configured for usage.
1942
	 * This becomes neccessary for extensions that provide additional database functionality like indexed_search_mysql.
1943
	 *
1944
	 * @param	string		Comma-separated list of tables
1945
	 * @return	boolean		True if given tables are enabled
1946
	 */
1947
	function isTableUsed($table_list) {
1948
		$OK = true;
1949
		$tableArr = t3lib_div::trimExplode(',', $table_list);
1950
		$enabledTableList = $GLOBALS['TYPO3_CONF_VARS']['EXTCONF']['indexed_search']['use_tables'];
1820 1951

  
1952
		foreach ($tableArr as $table) {
1953
			if (!t3lib_div::inList($enabledTableList, $table)) {
1954
				$OK = false;
1955
			}
1956
		}
1821 1957

  
1958
		return $OK;
1959
	}
1822 1960

  
1823 1961

  
1824 1962

  
......
1826 1964

  
1827 1965

  
1828 1966

  
1967

  
1968

  
1969

  
1829 1970
	/********************************
1830 1971
	 *
1831 1972
	 * SQL; Submitting words
......
1846 1987
		}
1847 1988
		if (count($phashArr))	{
1848 1989
			$cwl = implode(',',$phashArr);
1849
			$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')');
1990
			if ($this->isTableUsed('index_words')) {
1991
				$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('baseword', 'index_words', 'wid IN ('.$cwl.')');
1992
			} else {
1993
				$res = false;
1994
			}
1850 1995

  
1851
			if($GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) {
1996
			if ($res && $GLOBALS['TYPO3_DB']->sql_num_rows($res)!=count($wl)) {
1852 1997
				$this->log_setTSlogMessage('Inserting words: '.(count($wl)-$GLOBALS['TYPO3_DB']->sql_num_rows($res)),1);
1853
				while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1998
				while ($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
1854 1999
					unset($wl[$row['baseword']]);
1855 2000
				}
1856 2001

  
1857 2002
				reset($wl);
1858
				while(list($key,$val)=each($wl)) {
2003
				while (list($key,$val)=each($wl)) {
1859 2004
					$insertFields = array(
1860 2005
						'wid' => $val['hash'],
1861 2006
						'baseword' => $key,
1862 2007
						'metaphone' => $val['metaphone']
1863 2008
					);
1864 2009
						// A duplicate-key error will occur here if a word is NOT unset in the unset() line. However as long as the words in $wl are NOT longer as 60 chars (the baseword varchar is 60 characters...) this is not a problem.
1865
					$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
2010
					if ($this->isTableUsed('index_words')) {
2011
						$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_words', $insertFields);
2012
					}
1866 2013
				}
1867 2014
			}
1868 2015
		}
......
1876 2023
	 * @return	void
1877 2024
	 */
1878 2025
	function submitWords($wl,$phash) {
1879
		$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash));
2026
		if ($this->isTableUsed('index_rel')) {
2027
			$GLOBALS['TYPO3_DB']->exec_DELETEquery('index_rel', 'phash='.intval($phash));
2028
		}
1880 2029

  
1881
		foreach($wl as $val)	{
2030
		foreach ($wl as $val) {
1882 2031
			$insertFields = array(
1883 2032
				'phash' => $phash,
1884 2033
				'wid' => $val['hash'],
......
1888 2037
				'flags' => ($val['cmp'] & $this->flagBitMask)
1889 2038
			);
1890 2039

  
1891
			$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
2040
			if ($this->isTableUsed('index_rel')) {
2041
				$GLOBALS['TYPO3_DB']->exec_INSERTquery('index_rel', $insertFields);
2042
			}
1892 2043
		}
1893 2044
	}
1894 2045

  
typo3/sysext/indexed_search/ext_tables.sql (Arbeitskopie)
38 38
CREATE TABLE index_fulltext (
39 39
  phash int(11) DEFAULT '0' NOT NULL,
40 40
  fulltextdata mediumtext,
41
  metaphonedata mediumtext,
41 42
  PRIMARY KEY (phash)
42 43
) ENGINE=InnoDB;
43 44

  
......
116 117
  PRIMARY KEY (uid)
117 118
) ENGINE=InnoDB;
118 119

  
119

  
120 120
#
121 121
# Table structure for table 'index_stat_word'
122 122
#
......
125 125
  word varchar(30) DEFAULT '' NOT NULL,
126 126
  index_stat_search_id int(11) DEFAULT '0' NOT NULL,
127 127
  tstamp int(11) DEFAULT '0' NOT NULL,
128
  pageid int(11) DEFAULT '0' NOT NULL,
128 129
  PRIMARY KEY (uid),
129 130
  KEY tstamp (tstamp,word)
130 131
) ENGINE=InnoDB;
......
177 178
  PRIMARY KEY (uid),
178 179
  KEY parent (pid)
179 180
);
180

  
181

  
182
#
183
# Table structure for table 'index_stat_word'
184
#
185
CREATE TABLE index_stat_word (
186
  uid int(11) NOT NULL auto_increment,
187
  word varchar(30) DEFAULT '' NOT NULL,
188
  index_stat_search_id int(11) DEFAULT '0' NOT NULL,
189
  tstamp int(11) DEFAULT '0' NOT NULL,
190
  pageid int(11) DEFAULT '0' NOT NULL,
191
  PRIMARY KEY (uid),
192
  KEY tstamp (tstamp,word)
193
) ENGINE=InnoDB;
typo3/sysext/indexed_search/doc/README.txt (Arbeitskopie)
1 1
A full documentation manual for the indexed search extension can be found in the extension "doc_indexed_search" in the TER.
2 2
See http://typo3.org/documentation/document-library/extension-manuals/doc_indexed_search/current/view/
3

  
4

  
5
This is a list of all tables which are used by this extension:
6

  
7
index_phash
8
- Page information
9

  
10
index_fulltext
11
- Fulltext data
12

  
13
index_rel
14
- Relations between index_phash and index_words
15

  
16
index_words
17
- baseword table
18

  
19
index_section
20
- section index (= first 3 levels of the rootline for this document)
21

  
22
index_grlist
23
- group list information
24
- indicates which gr_list has access to which phash
typo3/sysext/indexed_search/class.doublemetaphone.php (Arbeitskopie)
36 36

  
37 37

  
38 38
// TYPO3: Had to change name to "user_DoubleMetaPhone" from just "DoubleMetaPhone" because TYPO3 requires a user class to be prefixed so:
39
// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you simply configure TYPO3 so by setting the line below in your localconf.php file:
40
// TYPO3:			$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
41
// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example.
39
// TYPO3: If you want to use this metaphone method instead of the default in the class.indexer.php you can enable it in the extension configuration
40
// TYPO3: Of course you can write your own metaphone hook methods by taking this class and configuration as example (also see ext_localconf.php)
42 41

  
43 42
class user_DoubleMetaPhone
44 43
{
typo3/sysext/indexed_search/ext_localconf.php (Arbeitskopie)
43 43
	'tif' => 'EXT:indexed_search/class.external_parser.php:&tx_indexed_search_extparse',
44 44
);
45 45

  
46
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['use_tables'] = 'index_phash,index_fulltext,index_rel,index_words,index_section,index_grlist,index_stat_search,index_stat_word,index_debug,index_config';
46 47

  
48
$_EXTCONF = unserialize($_EXTCONF);	// unserializing the configuration so we can use it here:
49

  
50
	// Use the advanced doubleMetaphone parser instead of the internal one (usage of metaphone parsers is generally disabled by default)
51
if (isset($_EXTCONF['enableMetaphoneSearch']) && intval($_EXTCONF['enableMetaphoneSearch'])==2) {
52
	$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['metaphone'] = 'EXT:indexed_search/class.doublemetaphone.php:&user_DoubleMetaPhone';
53
}
54

  
47 55
	// EXAMPLE configuration of hooks:
48 56
/*
49 57
$TYPO3_CONF_VARS['EXTCONF']['indexed_search']['pi1_hooks'] = array (
typo3/sysext/indexed_search/pi/considerations.txt (Arbeitskopie)
1
- Search is always case insensitive. If you need a case sensitive search, use a binary collation for the index_fulltext and index_words tables.
2

  
3

  
1 4
MAILS about:
2 5

  
3 6

  
......
17 20

  
18 21
If you are an SQL wizard, you may be able to help me here.
19 22

  
20
In the (coming) index searching thing, I have three main tables. 
23
In the (coming) index searching thing, I have three main tables.
21 24

  
22 25
- index_words which contains all the words indexed
23 26
- index_pages which represents a link to a page id or external url
......
27 30
So searching an OR search for "content" and "management" could be done like this:
28 31

  
29 32

  
30
SELECT STRAIGHT_JOIN [some fields here...] FROM 
31
index_words AS IW, 
32
index_rel AS IR, 
33
SELECT STRAIGHT_JOIN [some fields here...] FROM
34
index_words AS IW,
35
index_rel AS IR,
33 36
index_phash AS IP
34
WHERE 
35
IR.phash = IP.phash AND 
36
IW.wid=IR.wid AND 
37
WHERE
38
IR.phash = IP.phash AND
39
IW.wid=IR.wid AND
37 40
(IW.baseword = 'content' OR IW.baseword = 'management')
38 41
[... and here comes some GROUP BY, ORDER BY and LIMIT]
39 42

  
......
45 48

  
46 49
Therefore I tought of a little trick to do it:
47 50

  
48
SELECT STRAIGHT_JOIN [some fields here...] FROM 
49
index_words AS IW, 
50
index_rel AS IR, 
51
index_words AS IW2, 
52
index_rel AS IR2, 
51
SELECT STRAIGHT_JOIN [some fields here...] FROM
52
index_words AS IW,
53
index_rel AS IR,
54
index_words AS IW2,
55
index_rel AS IR2,
53 56
index_phash AS IP
54
WHERE 
55
IW.wid=IR.wid AND 
56
IW2.wid=IR2.wid AND 
57
IR.phash = IP.phash AND 
58
IR2.phash = IP.phash AND 
57
WHERE
58
IW.wid=IR.wid AND
59
IW2.wid=IR2.wid AND
60
IR.phash = IP.phash AND
61
IR2.phash = IP.phash AND
59 62
(IW.baseword = 'content' and IW2.baseword = 'management')
60 63
[... and here comes some GROUP BY, ORDER BY and LIMIT]
61 64

  
62 65

  
63 66

  
64
... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins. 
67
... and actually I think this works, but it's very slow, probably because the internal result in MySQL becomes extremely large due to the joins.
65 68

  
66 69
Can anyone help me?
67 70

  
68 71

  
69 72

  
70 73

  
71
I checked out kwIndex from hotscripts and he does it like this: 
74
I checked out kwIndex from hotscripts and he does it like this:
72 75

  
73 76
1) Select the word-ids (SQL-query 1)
74 77
2) If both words were found, make another query for all linking-table entries matching the words and group by the word-id. The count(*) statement shows the number equal to the number of searchwords if they were both found. So select only records which delivers this. Then you have the document ids.... (SQL 2)
......
159 162

  
160 163
OK, I am not an sql-wizard. Just some hints and wishes, which I hope are
161 164
helpful.
162
> 
165
>
163 166
> 1) To the AND question:
164
> 
167
>
165 168
> Maybe it's best to make a search for each word; After getting the total list
166 169
> of page-ids from first search, this is included as a condition in the next
167 170
> search, which generates a new list which is included in the next search,
......
196 199
I have to say a really elaborate, fast and multiformat (pdf's !) search
197 200
engine is really one the single most important things for every 100+
198 201
website.
199
> 
202
>
200 203
> 2) Search query syntax
201 204
> Any suggestions to a search query syntax.
202 205
> - Search for "content management" is by default AND search
......
317 320
*****************************************************************************************************************
318 321

  
319 322

  
323
OK there were some fancy calculations promoted by Graeme Merrall:
324

  
325
"However, regarding relevance you probably want to look at something like
326
Salton's formula which is a good easy way to measure relevance.
327
Oracle Intermedia uses this and it's pretty simple:
328
Score can be between 0 and 100, but the top-scoring document in the query
329
will not necessarily have a score of 100 -- scoring is relative, not
330
absolute. This means that scores are not comparable across indexes, or even
331
across different queries on the same index. Score for each document is
332
computed using the standard Salton formula:
333

  
334
	3f(1+log(N/n))
335

  
336
Where f is the frequency of the search term in the document, N is the total
337
number of rows in the table, and n is the number of rows which contain the
338
search term. This is converted into an integer in the range 0 - 100.
339

  
340
There's a good doc on it at
341
http://ls6-www.informatik.uni-dortmund.de/bib/fulltext/ir/Pfeifer:97/
342
although it may be a little complex for what you require so just pick the
343
relevant parts out.
344
"
345

  
346
However I chose not to go with this for several reasons.
347
I do not claim that my ways of calculating importance here is the best.
348
ANY (better) suggestion for ranking calculation is accepted! (as long as they are shipped with tested code in exchange for this.)
349

  
350

  
351

  
352

  
353
*****************************************************************************************************************
354
*****************************************************************************************************************
typo3/sysext/indexed_search/pi/class.tx_indexedsearch.php (Arbeitskopie)
150 150
	var $cache_rl = array();		// Caching of root line data
151 151
	var $fe_groups_required = array();	// Required fe_groups memberships for display of a result.
152 152
	var $domain_records = array();		// Domain records (?)
153
	var $wSelClauses = array();		// Select clauses for individual words
154 153
	var $resultSections = array();		// Page tree sections for search result.
155 154
	var $external_parsers = array();	// External parser objects
156 155
	var $iconFileNameCache = array();	// Storage of icons....
156
	var $templateCode;			// Will hold the content of $conf['templateFile']
157
	var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results';
158
	var $indexerConfig = array();		// Indexer configuration, coming from $GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']
157 159

  
160
	var $enableMetaphoneSearch = false;
161
	var $storeMetaphoneInfoAsWords;
162

  
163

  
158 164
	/**
159 165
	 * Lexer object
160 166
	 *
......
162 168
	 */
163 169
	var $lexerObj;
164 170

  
165
	/**
166
	 * Indexer object
167
	 *
168
	 * @var tx_indexedsearch_indexer
169
	 */
170
	var $indexerObj;
171
	var $templateCode;			// Will hold the content of $conf['templateFile']
172
	var $hiddenFieldList = 'ext, type, defOp, media, order, group, lang, desc, results';
171
	const WILDCARD_LEFT  = 1;
172
	const WILDCARD_RIGHT = 2;
173 173

  
174 174

  
175 175
	/**
......
186 186
		$this->pi_loadLL();
187 187
		$this->pi_setPiVarDefaults();
188 188

  
189
			// Initialize the indexer-class - just to use a few function (for making hashes)
190
		$this->indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');
191

  
192 189
			// Initialize:
193 190
		$this->initialize();
194 191

  
......
203 200
			$this->printRules().
204 201
			$content;
205 202

  
206
        return $this->pi_wrapInBaseClass($content);
207
    }
203
		return $this->pi_wrapInBaseClass($content);
204
	}
208 205

  
209 206
	/**
210 207
	 * Initialize internal variables, especially selector box values for the search form and search words
......
214 211
	function initialize()	{
215 212
		global $TYPO3_CONF_VARS;
216 213

  
214
			// Indexer configuration from Extension Manager interface:
215
		$this->indexerConfig = unserialize($GLOBALS['TYPO3_CONF_VARS']['EXT']['extConf']['indexed_search']);
216
		$this->enableMetaphoneSearch = $this->indexerConfig['enableMetaphoneSearch'] ? 1 : 0;
217
		$this->storeMetaphoneInfoAsWords = $this->isTableUsed('index_words') ? false : true;
218

  
217 219
			// Initialize external document parsers for icon display and other soft operations
218 220
		if (is_array($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers']))	{
219 221
			foreach ($TYPO3_CONF_VARS['EXTCONF']['indexed_search']['external_parsers'] as $extension => $_objRef)	{
......
305 307
			)
306 308
		);
307 309

  
310
		if (!$this->enableMetaphoneSearch) {
311
			unset ($this->optValues['type']['10']);	// Remove this option if metaphone search is disabled)
312
		}
313

  
308 314
			// Free Index Uid:
309 315
		if ($this->conf['search.']['defaultFreeIndexUidList'])	{
310 316
			$uidList = t3lib_div::intExplode(',', $this->conf['search.']['defaultFreeIndexUidList']);
......
378 384

  
379 385
			// Add search languages:
380 386
		$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery('*', 'sys_language', '1=1'.$this->cObj->enableFields('sys_language'));
381
		while($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
382
			$this->optValues['lang'][$lR['uid']] = $lR['title'];
387
		if ($res) {
388
			while ($lR = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res)) {
389
				$this->optValues['lang'][$lR['uid']] = $lR['title'];
390
			}
383 391
		}
384 392

  
385 393
			// Calling hook for modification of initialized content
......
416 424
	}
417 425

  
418 426
	/**
419
	 * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holds the SQL operator (eg. AND, OR)
427
	 * Splits the search word input into an array where each word is represented by an array with key "sword" holding the search word and key "oper" holding the SQL operator (eg. AND, OR)
420 428
	 *
421 429
	 * Only words with 2 or more characters are accepted
422 430
	 * Max 200 chars total
423
	 * Space is used to split words, "" can be used search for a whole string (not indexed search then)
431
	 * Space is used to split words, "" can be used search for a whole string
424 432
	 * AND, OR and NOT are prefix words, overruling the default operator
425 433
	 * +/|/- equals AND, OR and NOT as operators.
426 434
	 * All search words are converted to lowercase.
......
438 446
		$inSW = $GLOBALS['TSFE']->csConvObj->utf8_encode($inSW, $GLOBALS['TSFE']->metaCharset);
439 447
		$inSW = $GLOBALS['TSFE']->csConvObj->entities_to_utf8($inSW,TRUE);
440 448

  
449
		$sWordArray = false;
441 450
		if ($hookObj = &$this->hookRequest('getSearchWords'))	{
442
			return $hookObj->getSearchWords_splitSWords($inSW, $defOp);
451
			$sWordArray = $hookObj->getSearchWords_splitSWords($inSW, $defOp);
443 452
		} else {
444 453

  
445
			if ($this->piVars['type']==20)	{
446
				return array(array('sword'=>trim($inSW), 'oper'=>'AND'));
454
			if ($this->piVars['type']==20) {	// Sentence
455
				$sWordArray = array(
456
					array(
457
						'sword' => trim($inSW),
458
						'oper' => 'AND'
459
					)
460
				);
447 461
			} else {
448 462
				$search = t3lib_div::makeInstance('tslib_search');
449 463
				$search->default_operator = $defOp==1 ? 'OR' : 'AND';
......
451 465
				$search->register_and_explode_search_string($inSW);
452 466

  
453 467
				if (is_array($search->sword_array))	{
454
					return $this->procSearchWordsByLexer($search->sword_array);
468
					$sWordArray = $this->procSearchWordsByLexer($search->sword_array);
455 469
				}
456 470
			}
457 471
		}
472

  
473
		return $sWordArray;
458 474
	}
459 475

  
460 476
	/**
......
570 586

  
571 587
			// Getting SQL result pointer:
572 588
			$GLOBALS['TT']->push('Searching result');
573
		$res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid);
589
		if ($hookObj = &$this->hookRequest('getResultRows_SQLpointer')) {
590
			$res = $hookObj->getResultRows_SQLpointer($sWArr,$freeIndexUid);
591
		} else {
592
			$res = $this->getResultRows_SQLpointer($sWArr,$freeIndexUid);
593
		}
574 594
			$GLOBALS['TT']->pull();
575 595

  
576 596
			// Organize and process result:
......
592 612
				// Each row should contain the fields from 'ISEC.*, IP.*' combined + artificial fields "show_resume" (boolean) and "result_number" (counter)
593 613
			while($row = $GLOBALS['TYPO3_DB']->sql_fetch_assoc($res))	{
594 614

  
615
                                if (!$this->checkExistance($row)) {
616
						// Check if the record is still available or if it has been deleted meanwhile.
617
						// Currently this works for files only, since extending it to content elements would cause a lot of overhead...
618
						// Otherwise, skip the row.
619
					$count--;
620
					continue;
621
				}
622

  
595 623
					// Set first row:
596 624
				if (!$c)	{
597 625
					$firstRow = $row;
......
611 639
						$c++;	// Increase the result pointer
612 640

  
613 641
							// All rows for display is put into resultRows[]
614
						if ($c > $pointer * $this->piVars['results'])	{
642
						if ($c > $pointer * $this->piVars['results'] && $c <= ($pointer+1) * $this->piVars['results']) {
615 643
							$row['result_number'] = $c;
616 644
							$resultRows[] = $row;
617
								// This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above).
618
							if (!$exactCount && (($c+1) > ($pointer+1)*$this->piVars['results']))	{ break; }
619 645
						}
646
							// This may lead to a problem: If the result check is not stopped here, the search will take longer. However the result counter will not filter out grouped cHashes/pHashes that were not processed yet. You can change this behavior using the "search.exactCount" property (see above).
647
						if (!$exactCount && (($c+1) > ($pointer+1) * $this->piVars['results'])) {
648
							break;
649
						}
620 650
					} else {
621 651
						$count--;	// Skip this row if the user cannot view it (missing permission)
622 652
					}
......
626 656
			}
627 657

  
628 658
			return array(
629
						'resultRows' => $resultRows,
630
						'firstRow' => $firstRow,
631
						'count' => $count
632
					);
659
				'resultRows' => $resultRows,
660
				'firstRow' => $firstRow,
661
				'count' => $count
662
			);
633 663
		} else {	// No results found:
634 664
			return FALSE;
635 665
		}
......
643 673
	 * @return	pointer
644 674
	 */
645 675
	function getResultRows_SQLpointer($sWArr,$freeIndexUid=-1)	{
646
				// This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches.
676
			// This SEARCHES for the searchwords in $sWArr AND returns a COMPLETE list of phash-integers of the matches.
647 677
		$list = $this->getPhashList($sWArr);
648 678

  
649 679
			// Perform SQL Search / collection of result rows array:
......
825 855
			// Initialize variables:
826 856
		$c=0;
827 857
		$totalHashList = array();	// This array accumulates the phash-values
828
		$this->wSelClauses = array();
829 858

  
830 859
			// Traverse searchwords; for each, select all phash integers and merge/diff/intersect them with previous word (based on operator)
831 860
		foreach ($sWArr as $k => $v)	{
832 861
				// Making the query for a single search word based on the search-type
833 862
			$sWord = $v['sword'];	// $GLOBALS['TSFE']->csConvObj->conv_case('utf-8',$v['sword'],'toLower');	// lower-case all of them...
834 863
			$theType = (string)$this->piVars['type'];
835
			if (strstr($sWord,' '))	$theType = 20;	// If there are spaces in the search-word, make a full text search instead.
864
			if (strstr($sWord,' ')) {
865
				$theType = 20;	// If there are spaces in the search-word, make a full text search instead.
866
			}
836 867

  
837 868
			$GLOBALS['TT']->push('SearchWord "'.$sWord.'" - $theType='.$theType);
838 869

  
839
			$res = '';
840
			$wSel='';
841

  
842 870
				// Perform search for word:
843
			switch($theType)	{
871
			switch ($theType) {
844 872
				case '1':	// Part of word
845
					$wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'";
846
					$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
873
					$res = $this->searchWord($sWord, self::WILDCARD_LEFT | self::WILDCARD_RIGHT);
847 874
				break;
848 875
				case '2':	// First part of word
849
					$wSel = "IW.baseword LIKE '".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."%'";
850
					$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
876
					$res = $this->searchWord($sWord, self::WILDCARD_RIGHT);
851 877
				break;
852 878
				case '3':	// Last part of word
853
					$wSel = "IW.baseword LIKE '%".$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_words')."'";
854
					$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
879
					$res = $this->searchWord($sWord, self::WILDCARD_LEFT);
855 880
				break;
856 881
				case '10':	// Sounds like
857
					$wSel = 'IW.metaphone = '.$this->indexerObj->metaphone($sWord);
858
					$res = $this->execPHashListQuery($wSel,' AND is_stopword=0');
882

  
883
					/**
884
					 * Indexer object
885
					 *
886
					 * @var tx_indexedsearch_indexer
887
					 */
888
					$indexerObj = t3lib_div::makeInstance('tx_indexedsearch_indexer');	// Initialize the indexer-class
889

  
890
						// Perform metaphone search
891
					$res = $this->searchMetaphone($indexerObj->metaphone($sWord,$this->storeMetaphoneInfoAsWords));
892

  
893
					unset($indexerObj);
859 894
				break;
860 895
				case '20':	// Sentence
861
					$res = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
862
								'ISEC.phash',
863
								'index_section ISEC, index_fulltext IFT',
864
								'IFT.fulltextdata LIKE \'%'.$GLOBALS['TYPO3_DB']->quoteStr($sWord, 'index_fulltext').'%\' AND
865
									ISEC.phash = IFT.phash
866
									'.$this->sectionTableWhere(),
867
								'ISEC.phash'
868
							);
869
					$wSel = '1=1';
870

  
871
					if ($this->piVars['type']==20)	$this->piVars['order'] = 'mtime';		// If there is a fulltext search for a sentence there is a likeliness that sorting cannot be done by the rankings from the rel-table (because no relations will exist for the sentence in the word-table). So therefore mtime is used instaed. It is not required, but otherwise some hits may be left out.
896
					$res = $this->searchSentence($sWord);
... This diff was truncated because it exceeds the maximum size that can be displayed.
(2-2/3)