Index: typo3/sysext/indexed_search/class.indexer.php
===================================================================
--- typo3/sysext/indexed_search/class.indexer.php (revision 7383)
+++ typo3/sysext/indexed_search/class.indexer.php (working copy)
@@ -788,8 +788,8 @@
$qParts = parse_url($linkSource); // parse again due to new linkSource!
}
- if ($qParts['scheme']) {
- if ($this->indexerConfig['indexExternalURLs']) {
+ if (!$linkInfo['localPath'] && $qParts['scheme']) {
+ if ($this->indexerConfig['indexExternalURLs']) {
// Index external URL (http or otherwise)
$this->indexExternalUrl($linkSource);
}
@@ -859,18 +859,11 @@
switch (strtolower($firstTagName)) {
case 'a':
- $src = $params[0]['href'];
- if ($src) {
- // Check if a local path to that file has been set - useful if you are using a download script.
- $md5 = t3lib_div::shortMD5($src);
- if (is_array($indexLocalFiles=$GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'])) {
- $localPath = isset($indexLocalFiles[$md5]) ? $indexLocalFiles[$md5] : '';
- } else $localPath=false;
-
+ if ($params[0]['href'] && $params[0]['href']{0} != '#') {
$list[] = array(
'tag' => $v,
'href' => $params[0]['href'],
- 'localPath' => $localPath
+ 'localPath' => $this->createLocalPath($params[0]['href'])
);
}
break;
@@ -985,16 +978,152 @@
+ /**
+ * Checks if the file is local
+ *
+ * @param $sourcePath
+ * @return string Absolute path to file if file is local, else empty string
+ */
+ protected function createLocalPath($sourcePath) {
+ static $pathFunctions = array(
+ 'createLocalPathFromT3vars',
+ 'createLocalPathUsingAbsRefPrefix',
+ 'createLocalPathUsingDomainURL',
+ 'createLocalPathFromAbsoluteURL',
+ 'createLocalPathFromRelativeURL'
+ );
+ foreach ($pathFunctions as $functionName) {
+ $localPath = $this->$functionName($sourcePath);
+ if ($localPath != '') {
+ break;
+ }
+ }
+ return $localPath;
+ }
+ /**
+ * Attempts to create a local file path from T3VARs. This is useful for
+ * various download extensions that hide actual file name but still want the
+ * file to be indexed.
+ *
+ * @param string $sourcePath
+ * @return string
+ */
+ protected function createLocalPathFromT3vars($sourcePath) {
+ $localPath = '';
+ $indexLocalFiles = $GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'];
+ if (is_array($indexLocalFiles)) {
+ $md5 = t3lib_div::shortMD5($sourcePath);
+ // Note: not using self::isAllowedLocalFile here because this method
+ // is allowed to index files outside of the web site (for example,
+ // protected downloads)
+ if (isset($indexLocalFiles[$md5]) && is_file(PATH_site . $indexLocalFiles[$md5])) {
+ $localPath = PATH_site . $indexLocalFiles[$md5];
+ }
+ }
+ return $localPath;
+ }
+ /**
+ * Attempts to create a local file path by matching a current request URL.
+ *
+ * @param string $sourcePath
+ * @return string
+ */
+ protected function createLocalPathUsingDomainURL($sourcePath) {
+ $baseURL = t3lib_div::getIndpEnv('TYPO3_SITE_URL');
+ $baseURLLength = strlen($baseURL);
+ if (substr($sourcePath, 0, $baseURLLength) == $baseURL) {
+ $sourcePath = substr($sourcePath, $baseURLLength);
+ $localPath = PATH_site . $sourcePath;
+ if (!self::isAllowedLocalFile($localPath)) {
+ $localPath = '';
+ }
+ }
+ return $localPath;
+ }
+ /**
+ * Attempts to create a local file path by matching absRefPrefix. This
+ * requires TSFE. If TSFE is missing, this function does nothing.
+ *
+ * @param string $sourcePath
+ * @return string
+ */
+ protected function createLocalPathUsingAbsRefPrefix($sourcePath) {
+ $localPath = '';
+ if ($GLOBALS['TSFE'] instanceof tslib_fe) {
+ $absRefPrefix = $GLOBALS['TSFE']->config['config']['absRefPrefix'];
+ $absRefPrefixLength = strlen($absRefPrefix);
+ if ($absRefPrefixLength > 0 && substr($sourcePath, 0, $absRefPrefixLength) == $absRefPrefix) {
+ $sourcePath = substr($sourcePath, $absRefPrefixLength);
+ $localPath = PATH_site . $sourcePath;
+ if (!self::isAllowedLocalFile($localPath)) {
+ $localPath = '';
+ }
+ }
+ }
+ return $localPath;
+ }
+ /**
+ * Attempts to create a local file path from the absolute URL without
+ * schema.
+ *
+ * @param string $sourcePath
+ * @return string
+ */
+ protected function createLocalPathFromAbsoluteURL($sourcePath) {
+ if ($sourcePath{0} == '/') {
+ $sourcePath = substr($sourcePath, 1);
+ $localPath = PATH_site . $sourcePath;
+ if (!self::isAllowedLocalFile($localPath)) {
+ $localPath = '';
+ }
+ }
+ return $localPath;
+ }
+ /**
+ * Attempts to create a local file path from the relative URL.
+ *
+ * @param string $sourcePath
+ * @return string
+ */
+ protected function createLocalPathFromRelativeURL($sourcePath) {
+ if (self::isRelativeURL($sourcePath)) {
+ $localPath = PATH_site . $sourcePath;
+ if (!self::isAllowedLocalFile($localPath)) {
+ $localPath = '';
+ }
+ }
+ return $localPath;
+ }
+ /**
+ * Checks if URL is relative.
+ *
+ * @param string $url
+ * @return boolean
+ */
+ static protected function isRelativeURL($url) {
+ $urlParts = @parse_url($url);
+ return ($urlParts['scheme'] == '' && $urlParts['path']{0} != '/');
+ }
+ /**
+ * Checks if the path points to the file inside the web site
+ *
+ * @param string $filePath
+ * @return boolean
+ */
+ static protected function isAllowedLocalFile($filePath) {
+ $filePath = t3lib_div::resolveBackPath($filePath);
+ $insideWebPath = (substr($filePath, 0, strlen(PATH_site)) == PATH_site);
+ $isFile = is_file($filePath);
+ return $insideWebPath && $isFile;
+ }
-
-
/******************************************
*
* Indexing; external files (PDF, DOC, etc)
Index: typo3/sysext/indexed_search/tests/tx_indexedsearch_indexer_testcase.php
===================================================================
--- typo3/sysext/indexed_search/tests/tx_indexedsearch_indexer_testcase.php (revision 0)
+++ typo3/sysext/indexed_search/tests/tx_indexedsearch_indexer_testcase.php (revision 0)
@@ -0,0 +1,159 @@
+
+ * @package TYPO3
+ * @subpackage tx_indexedsearch
+ */
+class tx_indexedsearch_indexer_testcase extends tx_phpunit_testcase {
+
+ /**
+ * Indexer instance
+ *
+ * @var tx_indexedsearch_indexer
+ */
+ protected $indexer;
+
+ /**
+ * Sets up the test
+ *
+ * @return void
+ */
+ public function setUp() {
+ $this->indexer = t3lib_div::makeInstance('tx_indexedsearch_indexer');
+ }
+
+ /**
+ * Explicitly cleans up the indexer object to prevent any memory leaks
+ *
+ * @return void
+ */
+ public function tearDown() {
+ unset($this->indexer);
+ }
+
+ /**
+ * Checks that non-existing files are not returned
+ *
+ * @return void
+ */
+ protected function testNonExistingLocalPath() {
+ $html = 'test test test';
+ $result = $this->indexer->extractHyperLinks($html);
+
+ $this->assertEquals(1, count($result), 'Wrong number of parsed links');
+ $this->assertEquals($result[0]['localPath'], '', 'Local path is incorrect');
+ }
+
+ /**
+ * Checks that using t3vars returns correct file
+ *
+ * @return void
+ */
+ protected function testLocalPathWithT3Vars() {
+ $file = 'index.php';
+ $html = 'test test test';
+ $savedValue = $GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'];
+ $GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'] = array(
+ md5($file) => PATH_site . 'index.php'
+ );
+ $result = $this->indexer->extractHyperLinks($html);
+ $GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'] = $savedValue;
+
+ $this->assertEquals(1, count($result), 'Wrong number of parsed links');
+ $this->assertEquals($result[0]['localPath'], PATH_site . 'index.php', 'Local path is incorrect');
+ }
+
+ /**
+ * Tests that a path with baseURL
+ *
+ * @return void
+ */
+ public function testLocalPathWithSiteURL() {
+ $baseURL = t3lib_div::getIndpEnv('TYPO3_SITE_URL');
+ $html = 'test test test';
+ $result = $this->indexer->extractHyperLinks($html);
+
+ $this->assertEquals(1, count($result), 'Wrong number of parsed links');
+ $this->assertEquals($result[0]['localPath'], PATH_site . 'index.php', 'Local path is incorrect');
+ }
+
+ /**
+ * Tests absolute path
+ *
+ * @return void
+ */
+ public function testRelativeLocalPath() {
+ $html = 'test test test';
+ $result = $this->indexer->extractHyperLinks($html);
+ $this->assertEquals(1, count($result), 'Wrong number of parsed links');
+ $this->assertEquals($result[0]['localPath'], PATH_site . 'index.php', 'Local path is incorrect');
+ }
+
+ /**
+ * Tests absolute path.
+ *
+ * @return void
+ */
+ public function testAbsoluteLocalPath() {
+ $path = substr(PATH_typo3, strlen(PATH_site) - 1);
+ $html = 'test test test';
+ $result = $this->indexer->extractHyperLinks($html);
+
+ $this->assertEquals(1, count($result), 'Wrong number of parsed links');
+ $this->assertEquals($result[0]['localPath'], PATH_typo3 . 'index.php', 'Local path is incorrect');
+ }
+
+ /**
+ * Tests that a path with the absRefPrefix returns correct result
+ *
+ * @return void
+ */
+ public function testLocalPathWithAbsRefPrefix() {
+ $absRefPrefix = '/' . md5(uniqid(''));
+ $html = 'test test test';
+ $savedPrefix = $GLOBALS['TSFE']->config['config']['absRefPrefix'];
+ $GLOBALS['TSFE']->config['config']['absRefPrefix'] = $absRefPrefix;
+ $result = $this->indexer->extractHyperLinks($html);
+ $GLOBALS['TSFE']->config['config']['absRefPrefix'] = $savedPrefix;
+
+ $this->assertEquals(1, count($result), 'Wrong number of parsed links');
+ $this->assertEquals($result[0]['localPath'], PATH_site . 'index.php', 'Local path is incorrect');
+ }
+}
+
+if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/tests/class.tx_indexedsearch_indexer_testcase.php']) {
+ include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/tests/class.tx_indexedsearch_indexer_testcase.php']);
+}
+
+?>
\ No newline at end of file