Task #24551 » getURL-typo3-sysext-indexed_search.diff
typo3/sysext/indexed_search/class.external_parser.php | ||
---|---|---|
$cmd = $this->app['pdftotext'] . ' -f ' . $low . ' -l ' . $high . ' -enc UTF-8 -q ' . escapeshellarg($absFile) . ' ' . $tempFileName;
|
||
t3lib_utility_Command::exec($cmd);
|
||
if (@is_file($tempFileName)) {
|
||
$content = t3lib_div::getUrl($tempFileName);
|
||
$content = t3lib_div::getURL($tempFileName);
|
||
unlink($tempFileName);
|
||
} else {
|
||
$this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:pdfToolsFailed'), $absFile), 2);
|
||
... | ... | |
break;
|
||
case 'txt':
|
||
case 'csv': // Raw text
|
||
$content = t3lib_div::getUrl($absFile);
|
||
$content = t3lib_div::getURL($absFile);
|
||
// TODO: Auto-registration of charset???? -> utf-8 (Current assuming western europe...)
|
||
$content = $this->pObj->convertHTMLToUtf8($content, 'iso-8859-1');
|
||
$contentArr = $this->pObj->splitRegularContent($content);
|
||
... | ... | |
break;
|
||
case 'html':
|
||
case 'htm':
|
||
$fileContent = t3lib_div::getUrl($absFile);
|
||
$fileContent = t3lib_div::getURL($absFile);
|
||
$fileContent = $this->pObj->convertHTMLToUtf8($fileContent);
|
||
$contentArr = $this->pObj->splitHTMLContent($fileContent);
|
||
break;
|
||
case 'xml': // PHP strip-tags()
|
||
$fileContent = t3lib_div::getUrl($absFile);
|
||
$fileContent = t3lib_div::getURL($absFile);
|
||
// Finding charset:
|
||
preg_match('/^[[:space:]]*<\?xml[^>]+encoding[[:space:]]*=[[:space:]]*["\'][[:space:]]*([[:alnum:]_-]+)[[:space:]]*["\']/i',substr($fileContent,0,200),$reg);
|