Project

General

Profile

Bug #22296 » 13858_final.diff

Administrator Admin, 2010-05-31 10:41

View differences:

ChangeLog (revision 7780)
2010-05-31 Dmitry Dulepov <dmitry.dulepov@gmail.com>
* Fixed bug #13858: IS cannot not index files if absRefPrefix is set and indexExternalURLs is not (thanks to Christian Kuhn and Steffen Ritter for help!)
2010-05-31 Benjamin Mack <benni@typo3.org>
* Fixed bug #13138: Add hook for manipulating content in felogin extension (Thanks to Thomas Layh)
typo3/sysext/indexed_search/class.indexer.php (revision 7780)
$qParts = parse_url($linkSource); // parse again due to new linkSource!
}
if ($qParts['scheme']) {
if (!$linkInfo['localPath'] && $qParts['scheme']) {
if ($this->indexerConfig['indexExternalURLs']) {
// Index external URL (http or otherwise)
$this->indexExternalUrl($linkSource);
......
}
/**
* Extracts all links to external documents from content string.
* Extracts all links to external documents from the HTML content string
*
* @param string Content to analyse
* @return array Array of hyperlinks
* @param string $html
* @return array Array of hyperlinks (keys: tag, href, localPath (empty if not local))
* @see extractLinks()
*/
function extractHyperLinks($string) {
if (!is_object($this->htmlParser)) {
$this->htmlParser = t3lib_div::makeInstance('t3lib_parseHtml');
}
function extractHyperLinks($html) {
$htmlParser = t3lib_div::makeInstance('t3lib_parseHtml');
$htmlParts = $htmlParser->splitTags('a', $html);
$hyperLinksData = array();
foreach ($htmlParts as $index => $tagData) {
if (($index % 2) !== 0) {
$tagAttributes = $htmlParser->get_tag_attributes($tagData, TRUE);
$firstTagName = $htmlParser->getFirstTagName($tagData);
$parts = $this->htmlParser->splitTags('a',$string);
$list = array();
foreach ($parts as $k => $v) {
if ($k%2) {
$params = $this->htmlParser->get_tag_attributes($v,1);
$firstTagName = $this->htmlParser->getFirstTagName($v); // The 'name' of the first tag
switch (strtolower($firstTagName)) {
case 'a':
$src = $params[0]['href'];
if ($src) {
// Check if a local path to that file has been set - useful if you are using a download script.
$md5 = t3lib_div::shortMD5($src);
if (is_array($indexLocalFiles=$GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'])) {
$localPath = isset($indexLocalFiles[$md5]) ? $indexLocalFiles[$md5] : '';
} else $localPath=false;
$list[] = array(
'tag' => $v,
'href' => $params[0]['href'],
'localPath' => $localPath
if (strtolower($firstTagName) == 'a') {
if ($tagAttributes[0]['href'] && $tagAttributes[0]['href']{0} != '#') {
$hyperLinksData[] = array(
'tag' => $tagData,
'href' => $tagAttributes[0]['href'],
'localPath' => $this->createLocalPath($tagAttributes[0]['href'])
);
}
break;
}
}
}
return $list;
return $hyperLinksData;
}
/**
......
* @param string Content to analyze
* @return string The base href or an empty string if not found
*/
public function extractBaseHref($string) {
if (!is_object($this->htmlParser)) {
$this->htmlParser = t3lib_div::makeInstance('t3lib_parseHtml');
}
$parts = $this->htmlParser->splitTags('base', $string);
foreach ($parts as $key => $value) {
if ($key % 2) {
$params = $this->htmlParser->get_tag_attributes($value, 1);
$firstTagName = $this->htmlParser->getFirstTagName($value); // The 'name' of the first tag
switch (strtolower($firstTagName)) {
case 'base':
$href = $params[0]['href'];
public function extractBaseHref($html) {
$href = '';
$htmlParser = t3lib_div::makeInstance('t3lib_parseHtml');
$htmlParts = $htmlParser->splitTags('base', $html);
foreach ($htmlParts as $index => $tagData) {
if (($index % 2) !== 0) {
$tagAttributes = $htmlParser->get_tag_attributes($tagData, true);
$firstTagName = $htmlParser->getFirstTagName($tagData);
if (strtolower($firstTagName) == 'base') {
$href = $tagAttributes[0]['href'];
if ($href) {
// Return the first "base href" found (a single one should be present anyway)
return $href;
break;
}
}
}
}
return '';
return $href;
}
/******************************************
*
* Indexing; external URL
......
/**
* Checks if the file is local
*
* @param $sourcePath
* @return string Absolute path to file if file is local, else empty string
*/
protected function createLocalPath($sourcePath) {
$localPath = '';
static $pathFunctions = array(
'createLocalPathFromT3vars',
'createLocalPathUsingAbsRefPrefix',
'createLocalPathUsingDomainURL',
'createLocalPathFromAbsoluteURL',
'createLocalPathFromRelativeURL'
);
foreach ($pathFunctions as $functionName) {
$localPath = $this->$functionName($sourcePath);
if ($localPath != '') {
break;
}
}
return $localPath;
}
/**
* Attempts to create a local file path from T3VARs. This is useful for
* various download extensions that hide actual file name but still want the
* file to be indexed.
*
* @param string $sourcePath
* @return string
*/
protected function createLocalPathFromT3vars($sourcePath) {
$localPath = '';
$indexLocalFiles = $GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'];
if (is_array($indexLocalFiles)) {
$md5 = t3lib_div::shortMD5($sourcePath);
// Note: not using self::isAllowedLocalFile here because this method
// is allowed to index files outside of the web site (for example,
// protected downloads)
if (isset($indexLocalFiles[$md5]) && is_file($indexLocalFiles[$md5])) {
$localPath = $indexLocalFiles[$md5];
}
}
return $localPath;
}
/**
* Attempts to create a local file path by matching a current request URL.
*
* @param string $sourcePath
* @return string
*/
protected function createLocalPathUsingDomainURL($sourcePath) {
$localPath = '';
$baseURL = t3lib_div::getIndpEnv('TYPO3_SITE_URL');
$baseURLLength = strlen($baseURL);
if (substr($sourcePath, 0, $baseURLLength) == $baseURL) {
$sourcePath = substr($sourcePath, $baseURLLength);
$localPath = PATH_site . $sourcePath;
if (!self::isAllowedLocalFile($localPath)) {
$localPath = '';
}
}
return $localPath;
}
/**
* Attempts to create a local file path by matching absRefPrefix. This
* requires TSFE. If TSFE is missing, this function does nothing.
*
* @param string $sourcePath
* @return string
*/
protected function createLocalPathUsingAbsRefPrefix($sourcePath) {
$localPath = '';
if ($GLOBALS['TSFE'] instanceof tslib_fe) {
$absRefPrefix = $GLOBALS['TSFE']->config['config']['absRefPrefix'];
$absRefPrefixLength = strlen($absRefPrefix);
if ($absRefPrefixLength > 0 && substr($sourcePath, 0, $absRefPrefixLength) == $absRefPrefix) {
$sourcePath = substr($sourcePath, $absRefPrefixLength);
$localPath = PATH_site . $sourcePath;
if (!self::isAllowedLocalFile($localPath)) {
$localPath = '';
}
}
}
return $localPath;
}
/**
* Attempts to create a local file path from the absolute URL without
* schema.
*
* @param string $sourcePath
* @return string
*/
protected function createLocalPathFromAbsoluteURL($sourcePath) {
$localPath = '';
if ($sourcePath{0} == '/') {
$sourcePath = substr($sourcePath, 1);
$localPath = PATH_site . $sourcePath;
if (!self::isAllowedLocalFile($localPath)) {
$localPath = '';
}
}
return $localPath;
}
/**
* Attempts to create a local file path from the relative URL.
*
* @param string $sourcePath
* @return string
*/
protected function createLocalPathFromRelativeURL($sourcePath) {
$localPath = '';
if (self::isRelativeURL($sourcePath)) {
$localPath = PATH_site . $sourcePath;
if (!self::isAllowedLocalFile($localPath)) {
$localPath = '';
}
}
return $localPath;
}
/**
* Checks if URL is relative.
*
* @param string $url
* @return boolean
*/
static protected function isRelativeURL($url) {
$urlParts = @parse_url($url);
return ($urlParts['scheme'] == '' && $urlParts['path']{0} != '/');
}
/**
* Checks if the path points to the file inside the web site
*
* @param string $filePath
* @return boolean
*/
static protected function isAllowedLocalFile($filePath) {
$filePath = t3lib_div::resolveBackPath($filePath);
$insideWebPath = (substr($filePath, 0, strlen(PATH_site)) == PATH_site);
$isFile = is_file($filePath);
return $insideWebPath && $isFile;
}
/******************************************
*
* Indexing; external files (PDF, DOC, etc)
typo3/sysext/indexed_search/tests/tx_indexedsearch_indexer_testcase.php (revision 7780)
<?php
/***************************************************************
* Copyright notice
*
* (c) 2010 Dmitry Dulepov (dmitry.dulepov@gmail.com)
* All rights reserved
*
* This script is part of the Typo3 project. The Typo3 project is
* free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* The GNU General Public License can be found at
* http://www.gnu.org/copyleft/gpl.html.
*
* This script is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* This copyright notice MUST APPEAR in all copies of the script!
***************************************************************/
/**
* $Id$
*
*/
require_once(t3lib_extMgm::extPath('indexed_search', 'class.indexer.php'));
/**
* This class contains unit tests for the indexer
*
* @author Dmitry Dulepov <dmitry.dulepov@gmail.com>
* @author Christian Kuhn <lolli@schwarzbu.ch>
* @package TYPO3
* @subpackage tx_indexedsearch
*/
class tx_indexedsearch_indexer_testcase extends tx_phpunit_testcase {
/**
* Indexer instance
*
* @var tx_indexedsearch_indexer
*/
protected $indexer;
/**
* A name of the temporary file
*
* @var string
*/
protected $temporaryFileName = '';
/**
* Sets up the test
*
* @return void
*/
public function setUp() {
$this->indexer = t3lib_div::makeInstance('tx_indexedsearch_indexer');
}
/**
* Explicitly cleans up the indexer object to prevent any memory leaks
*
* @return void
*/
public function tearDown() {
unset($this->indexer);
if ($this->temporaryFileName) {
@unlink($this->temporaryFileName);
}
}
/**
* Checks that non-existing files are not returned
*
* @return void
*/
public function testNonExistingLocalPath() {
$html = 'test <a href="' . md5(uniqid('')) . '">test</a> test';
$result = $this->indexer->extractHyperLinks($html);
$this->assertEquals(1, count($result), 'Wrong number of parsed links');
$this->assertEquals($result[0]['localPath'], '', 'Local path is incorrect');
}
/**
* Checks that using t3vars returns correct file
*
* @return void
*/
public function testLocalPathWithT3Vars() {
$this->temporaryFileName = tempnam(sys_get_temp_dir(), 't3unit-');
$html = 'test <a href="testfile">test</a> test';
$savedValue = $GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'];
$GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'] = array(
t3lib_div::shortMD5('testfile') => $this->temporaryFileName
);
$result = $this->indexer->extractHyperLinks($html);
$GLOBALS['T3_VAR']['ext']['indexed_search']['indexLocalFiles'] = $savedValue;
$this->assertEquals(1, count($result), 'Wrong number of parsed links');
$this->assertEquals($result[0]['localPath'], $this->temporaryFileName, 'Local path is incorrect');
}
/**
* Tests that a path with baseURL
*
* @return void
*/
public function testLocalPathWithSiteURL() {
$baseURL = t3lib_div::getIndpEnv('TYPO3_SITE_URL');
$html = 'test <a href="' . $baseURL . 'index.php">test</a> test';
$result = $this->indexer->extractHyperLinks($html);
$this->assertEquals(1, count($result), 'Wrong number of parsed links');
$this->assertEquals($result[0]['localPath'], PATH_site . 'index.php', 'Local path is incorrect');
}
/**
* Tests absolute path
*
* @return void
*/
public function testRelativeLocalPath() {
$html = 'test <a href="index.php">test</a> test';
$result = $this->indexer->extractHyperLinks($html);
$this->assertEquals(1, count($result), 'Wrong number of parsed links');
$this->assertEquals($result[0]['localPath'], PATH_site . 'index.php', 'Local path is incorrect');
}
/**
* Tests absolute path.
*
* @return void
*/
public function testAbsoluteLocalPath() {
$path = substr(PATH_typo3, strlen(PATH_site) - 1);
$html = 'test <a href="' . $path . 'index.php">test</a> test';
$result = $this->indexer->extractHyperLinks($html);
$this->assertEquals(1, count($result), 'Wrong number of parsed links');
$this->assertEquals($result[0]['localPath'], PATH_typo3 . 'index.php', 'Local path is incorrect');
}
/**
* Tests that a path with the absRefPrefix returns correct result
*
* @return void
*/
public function testLocalPathWithAbsRefPrefix() {
$absRefPrefix = '/' . md5(uniqid(''));
$html = 'test <a href="' . $absRefPrefix . 'index.php">test</a> test';
$savedPrefix = $GLOBALS['TSFE']->config['config']['absRefPrefix'];
$GLOBALS['TSFE']->config['config']['absRefPrefix'] = $absRefPrefix;
$result = $this->indexer->extractHyperLinks($html);
$GLOBALS['TSFE']->config['config']['absRefPrefix'] = $savedPrefix;
$this->assertEquals(1, count($result), 'Wrong number of parsed links');
$this->assertEquals($result[0]['localPath'], PATH_site . 'index.php', 'Local path is incorrect');
}
/**
* Checks that base HREF is extracted correctly
*
* @return void
*/
public function textExtractBaseHref() {
$baseHref = 'http://example.com/';
$html = '<html><head><Base Href="' . $baseHref . '" /></head></html>';
$result = $this->indexer->extractHyperLinks($html);
$this->assertEquals($baseHref, $result, 'Incorrect base href was extracted');
}
}
if (defined('TYPO3_MODE') && $TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/tests/class.tx_indexedsearch_indexer_testcase.php']) {
include_once($TYPO3_CONF_VARS[TYPO3_MODE]['XCLASS']['ext/indexed_search/tests/class.tx_indexedsearch_indexer_testcase.php']);
}
?>
(6-6/6)