Bug #45575 ยป MysqlFulltextIndexHook.php

Quoc-Viet Phan, 2013-02-18 14:54

 
1
<?php
2
namespace TYPO3\CMS\IndexedSearch\Hook;
3

    
4
/***************************************************************
5
 *  Copyright notice
6
 *
7
 *  (c) 2011 Michael Stucki (michael@typo3.org)
8
 *  All rights reserved
9
 *
10
 *  This script is part of the TYPO3 project. The TYPO3 project is
11
 *  free software; you can redistribute it and/or modify
12
 *  it under the terms of the GNU General Public License as published by
13
 *  the Free Software Foundation; either version 2 of the License, or
14
 *  (at your option) any later version.
15
 *
16
 *  The GNU General Public License can be found at
17
 *  http://www.gnu.org/copyleft/gpl.html.
18
 *  A copy is found in the textfile GPL.txt and important notices to the license
19
 *  from the author is found in LICENSE.txt distributed with these scripts.
20
 *
21
 *
22
 *  This script is distributed in the hope that it will be useful,
23
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
24
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25
 *  GNU General Public License for more details.
26
 *
27
 *  This copyright notice MUST APPEAR in all copies of the script!
28
 ***************************************************************/
29
/**
30
 * Class that hooks into Indexed Search and replaces standard SQL queries with MySQL fulltext index queries.
31
 *
32
 * @author 	Michael Stucki <michael@typo3.org>
33
 */
34
class MysqlFulltextIndexHook {
35

    
36
	/**
37
	 * @var \TYPO3\CMS\IndexedSearch\Controller\SearchFormController
38
	 */
39
	public $pObj;
40

    
41
	const ANY_PART_OF_THE_WORD = '1';
42
	const LAST_PART_OF_THE_WORD = '2';
43
	const FIRST_PART_OF_THE_WORD = '3';
44
	const SOUNDS_LIKE = '10';
45
	const SENTENCE = '20';
46
	/**
47
	 * Gets a SQL result pointer to traverse for the search records.
48
	 *
49
	 * @param array $searchWordsArray Search words
50
	 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
51
	 * @return resource|false
52
	 */
53
	public function getResultRows_SQLpointer($searchWordsArray, $freeIndexUid = -1) {
54
		// Build the search string, detect which fulltext index to use, and decide whether boolean search is needed or not
55
		$searchData = $this->getSearchString($searchWordsArray);
56
		// Perform SQL Search / collection of result rows array:
57
		$resource = FALSE;
58
		if ($searchData) {
59
			// Do the search:
60
			$GLOBALS['TT']->push('execFinalQuery');
61
			$resource = $this->execFinalQuery_fulltext($searchData, $freeIndexUid);
62
			$GLOBALS['TT']->pull();
63
		}
64
		return $resource;
65
	}
66

    
67
	/**
68
	 * Returns a search string for use with MySQL FULLTEXT query
69
	 *
70
	 * @param array $searchWordArray Search word array
71
	 * @return string Search string
72
	 */
73
	public function getSearchString($searchWordArray) {
74
		// Initialize variables:
75
		$count = 0;
76
		$searchBoolean = FALSE;
77
		// Change this to TRUE to force BOOLEAN SEARCH MODE (useful if fulltext index is still empty)
78
		$fulltextIndex = 'index_fulltext.fulltextdata';
79
		$naturalSearchString = '';
80
		// This holds the result if the search is natural (doesn't contain any boolean operators)
81
		$booleanSearchString = '';
82
		// This holds the result if the search is boolen (contains +/-/| operators)
83
		$searchType = (string) $this->pObj->piVars['type'];
84
		// Traverse searchwords and prefix them with corresponding operator
85
		foreach ($searchWordArray as $searchWordData) {
86
			// Making the query for a single search word based on the search-type
87
			$searchWord = $searchWordData['sword'];
88
			$wildcard = '';
89
			if (strstr($searchWord, ' ')) {
90
				$searchType = self::SENTENCE;
91
			}
92
			switch ($searchType) {
93
			case self::ANY_PART_OF_THE_WORD:
94

    
95
			case self::LAST_PART_OF_THE_WORD:
96

    
97
			case self::FIRST_PART_OF_THE_WORD:
98
				// First part of word
99
				$wildcard = '*';
100
				// Part-of-word search requires boolean mode!
101
				$searchBoolean = TRUE;
102
				break;
103
			case self::SOUNDS_LIKE:
104
				$indexerObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\IndexedSearch\\Indexer');
105
				// Initialize the indexer-class
106
				/** @var \TYPO3\CMS\IndexedSearch\Indexer $indexerObj */
107
				$searchWord = $indexerObj->metaphone($searchWord, $indexerObj->storeMetaphoneInfoAsWords);
108
				unset($indexerObj);
109
				$fulltextIndex = 'index_fulltext.metaphonedata';
110
				break;
111
			case self::SENTENCE:
112
				$searchBoolean = TRUE;
113
				// Remove existing quotes and fix misplaced quotes.
114
				$searchWord = trim(str_replace('"', ' ', $searchWord));
115
				break;
116
			}
117
			// Perform search for word:
118
			switch ($searchWordData['oper']) {
119
			case 'AND NOT':
120
				$booleanSearchString .= ' -' . $searchWord . $wildcard;
121
				$searchBoolean = TRUE;
122
				break;
123
			case 'OR':
124
				$booleanSearchString .= ' ' . $searchWord . $wildcard;
125
				$searchBoolean = TRUE;
126
				break;
127
			default:
128
				$booleanSearchString .= ' +' . $searchWord . $wildcard;
129
				$naturalSearchString .= ' ' . $searchWord;
130
			}
131
			$count++;
132
		}
133
		if ($searchType == self::SENTENCE) {
134
			$searchString = '"' . trim($naturalSearchString) . '"';
135
		} elseif ($searchBoolean) {
136
			$searchString = trim($booleanSearchString);
137
		} else {
138
			$searchString = trim($naturalSearchString);
139
		}
140
		return array(
141
			'searchBoolean' => $searchBoolean,
142
			'searchString' => $searchString,
143
			'fulltextIndex' => $fulltextIndex
144
		);
145
	}
146

    
147
	/**
148
	 * Execute final query, based on phash integer list. The main point is sorting the result in the right order.
149
	 *
150
	 * @param array $searchData Array with search string, boolean indicator, and fulltext index reference
151
	 * @param int $freeIndexUid Pointer to which indexing configuration you want to search in. -1 means no filtering. 0 means only regular indexed content.
152
	 * @return resource Query result
153
	 */
154
	protected function execFinalQuery_fulltext($searchData, $freeIndexUid = -1) {
155
		// Setting up methods of filtering results based on page types, access, etc.
156
		$pageJoin = '';
157
		// Indexing configuration clause:
158
		$freeIndexUidClause = $this->pObj->freeIndexUidWhere($freeIndexUid);
159
		// Calling hook for alternative creation of page ID list
160
		if ($hookObj = &$this->pObj->hookRequest('execFinalQuery_idList')) {
161
			$pageWhere = $hookObj->execFinalQuery_idList('');
162
		} elseif ($this->pObj->join_pages) {
163
			// Alternative to getting all page ids by ->getTreeList() where "excludeSubpages" is NOT respected.
164
			$pageJoin = ',
165
				pages';
166
			$pageWhere = 'pages.uid = ISEC.page_id
167
				' . $this->pObj->cObj->enableFields('pages') . '
168
				AND pages.no_search=0
169
				AND pages.doktype<200
170
			';
171
		} elseif ($this->pObj->wholeSiteIdList >= 0) {
172
			// Collecting all pages IDs in which to search; filtering out ALL pages that are not accessible due to enableFields. Does NOT look for "no_search" field!
173
			$siteIdNumbers = \TYPO3\CMS\Core\Utility\GeneralUtility::intExplode(',', $this->pObj->wholeSiteIdList);
174
			$idList = array();
175
			foreach ($siteIdNumbers as $rootId) {
176
				$cObj = \TYPO3\CMS\Core\Utility\GeneralUtility::makeInstance('TYPO3\\CMS\\Frontend\\ContentObject\\ContentObjectRenderer');
177
				/** @var \TYPO3\CMS\Frontend\ContentObject\ContentObjectRenderer $cObj */
178
				$idList[] = $cObj->getTreeList($rootId, 9999, 0, 0, '', '') . $rootId;
179
			}
180
			$pageWhere = ' ISEC.page_id IN (' . implode(',', $idList) . ')';
181
		} else {
182
			// Disable everything... (select all)
183
			$pageWhere = ' 1=1';
184
		}
185
		$searchBoolean = '';
186
		if ($searchData['searchBoolean']) {
187
			$searchBoolean = ' IN BOOLEAN MODE';
188
		}
189
		$resource = $GLOBALS['TYPO3_DB']->exec_SELECTquery(
190
						'index_fulltext.*, ISEC.*, IP.*', 
191
						'index_fulltext, index_section ISEC, index_phash IP' . $pageJoin, 
192
						'MATCH (' . $searchData['fulltextIndex'] . ') AGAINST (' . $GLOBALS['TYPO3_DB']->fullQuoteStr($searchData['searchString'], 'index_fulltext') . $searchBoolean . ') ' . 
193
						$this->pObj->mediaTypeWhere() . ' ' . 
194
						$this->pObj->languageWhere() . 
195
						$freeIndexUidClause . '
196
						AND index_fulltext.phash = IP.phash
197
						AND ISEC.phash = IP.phash
198
						AND ' . $pageWhere . $this->pObj->sectionTableWhere(), 
199
						'IP.phash,ISEC.phash,ISEC.phash_t3,ISEC.rl0,ISEC.rl1,ISEC.rl2,ISEC.page_id,ISEC.uniqid,IP.phash_grouping,IP.data_filename ,IP.data_page_id ,IP.data_page_reg1,IP.data_page_type,IP.data_page_mp,IP.gr_list,IP.item_type,IP.item_title,IP.item_description,IP.item_mtime,IP.tstamp,IP.item_size,IP.contentHash,IP.crdate,IP.parsetime,IP.sys_language_uid,IP.item_crdate,IP.cHashParams,IP.externalUrl,IP.recordUid,IP.freeIndexUid,IP.freeIndexSetId'
200
					);
201
		return $resource;
202
	}
203

    
204
}
205

    
206

    
207
?>
    (1-1/1)