Bug #37795 ยป indexed_search_office2k7.patch
typo3conf/ext/cabag_patch/typo3_versions/4.5.16/class.ux_external_parser.php 2012-06-06 15:47:42.063007982 +0200 | ||
---|---|---|
* @package TYPO3
|
||
* @subpackage tx_indexedsearch
|
||
*/
|
||
class tx_indexed_search_extparse {
|
||
class ux_tx_indexed_search_extparse extends tx_indexed_search_extparse {
|
||
// This value is also overridden from config.
|
||
var $pdf_mode = -20; // zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10
|
||
... | ... | |
if ($indexerConfig['pdftools']) {
|
||
$pdfPath = rtrim($indexerConfig['pdftools'], '/').'/';
|
||
if ($safeModeEnabled || (@is_file($pdfPath . 'pdftotext' . $exe) && @is_file($pdfPath . 'pdfinfo' . $exe))) {
|
||
$this->app['pdfinfo'] = $pdfPath.'pdfinfo'.$exe;
|
||
$this->app['pdftotext'] = $pdfPath.'pdftotext'.$exe;
|
||
$this->app['pdfinfo'] = '"'.$pdfPath.'pdfinfo'.$exe.'"';
|
||
$this->app['pdftotext'] = '"'.$pdfPath.'pdftotext'.$exe.'"';
|
||
// PDF mode:
|
||
$this->pdf_mode = t3lib_div::intInRange($indexerConfig['pdf_mode'],-100,100);
|
||
$extOK = TRUE;
|
||
... | ... | |
if ($indexerConfig['catdoc']) {
|
||
$catdocPath = rtrim($indexerConfig['catdoc'], '/').'/';
|
||
if ($safeModeEnabled || @is_file($catdocPath . 'catdoc' . $exe)) {
|
||
$this->app['catdoc'] = $catdocPath.'catdoc'.$exe;
|
||
$this->app['catdoc'] = '"'.$catdocPath.'catdoc'.$exe.'"';
|
||
$extOK = TRUE;
|
||
} else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:catdocNotFound'), $catdocPath), 3);
|
||
} else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:catdocDisabled'), 1);
|
||
... | ... | |
if ($indexerConfig['ppthtml']) {
|
||
$ppthtmlPath = rtrim($indexerConfig['ppthtml'], '/').'/';
|
||
if ($safeModeEnabled || @is_file($ppthtmlPath . 'ppthtml' . $exe)) {
|
||
$this->app['ppthtml'] = $ppthtmlPath.'ppthtml'.$exe;
|
||
$this->app['ppthtml'] = '"'.$ppthtmlPath.'ppthtml'.$exe.'"';
|
||
$extOK = TRUE;
|
||
} else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:ppthtmlNotFound'), $ppthtmlPath), 3);
|
||
} else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:ppthtmlDisabled'), 1);
|
||
... | ... | |
if ($indexerConfig['xlhtml']) {
|
||
$xlhtmlPath = rtrim($indexerConfig['xlhtml'], '/').'/';
|
||
if ($safeModeEnabled || @is_file($xlhtmlPath . 'xlhtml' . $exe)) {
|
||
$this->app['xlhtml'] = $xlhtmlPath.'xlhtml'.$exe;
|
||
$this->app['xlhtml'] = '"'.$xlhtmlPath.'xlhtml'.$exe.'"';
|
||
$extOK = TRUE;
|
||
} else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:xlhtmlNotFound'), $xlhtmlPath), 3);
|
||
} else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:xlhtmlDisabled'), 1);
|
||
... | ... | |
if ($indexerConfig['unzip']) {
|
||
$unzipPath = rtrim($indexerConfig['unzip'], '/').'/';
|
||
if ($safeModeEnabled || @is_file($unzipPath . 'unzip' . $exe)) {
|
||
$this->app['unzip'] = $unzipPath.'unzip'.$exe;
|
||
$this->app['unzip'] = '"'.$unzipPath.'unzip'.$exe.'"';
|
||
$extOK = TRUE;
|
||
} else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:unzipNotFound'), $unzipPath), 3);
|
||
} else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:unzipDisabled'), 1);
|
||
break;
|
||
case 'docx': // Microsoft Word 2007/2008
|
||
case 'dotx':
|
||
case 'xlsx': // Microsoft Excel 2007/2008
|
||
case 'xltx':
|
||
case 'pptx': // Microsoft PowerPoint
|
||
case 'ppsx':
|
||
case 'potx':
|
||
if ($indexerConfig['unzip']) {
|
||
$unzipPath = rtrim($indexerConfig['unzip'], '/').'/';
|
||
if (ini_get('safe_mode') || @is_file($unzipPath.'unzip'.$exe)) {
|
||
$this->app['unzip'] = '"'.$unzipPath.'unzip'.$exe.'"';
|
||
$extOK = TRUE;
|
||
} else $this->pObj->log_setTSlogMessage("'unzip' tool for reading DOCX/XLSX/PPTX files was not found in path '".$unzipPath."unzip'",3);
|
||
} else $this->pObj->log_setTSlogMessage('unzip tool (docx/pptx/xlsx) disabled', 1);
|
||
break;
|
||
case 'rtf':
|
||
// Catdoc
|
||
if ($indexerConfig['unrtf']) {
|
||
$unrtfPath = rtrim($indexerConfig['unrtf'], '/').'/';
|
||
if ($safeModeEnabled || @is_file($unrtfPath . 'unrtf' . $exe)) {
|
||
$this->app['unrtf'] = $unrtfPath.'unrtf'.$exe;
|
||
$this->app['unrtf'] = '"'.$unrtfPath.'unrtf'.$exe.'"';
|
||
$extOK = TRUE;
|
||
} else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:unrtfNotFound'), $unrtfPath), 3);
|
||
} else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:unrtfDisabled'), 1);
|
||
... | ... | |
function softInit($extension) {
|
||
switch($extension) {
|
||
case 'pdf': // PDF
|
||
case 'doc': // MS Word files
|
||
case 'doc': // MS Word filescase 'docx':
|
||
case 'docx': // \
|
||
case 'dotx': // \
|
||
case 'pptx': // -- Microsoft Office 2007/2008
|
||
case 'ppsx': // /
|
||
case 'potx': // /
|
||
case 'xlsx': // /
|
||
case 'xltx':
|
||
case 'pps': // MS PowerPoint
|
||
case 'ppt': // MS PowerPoint
|
||
case 'xls': // MS Excel
|
||
... | ... | |
return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.PDF'), $extension);
|
||
}
|
||
break;
|
||
case 'dot':
|
||
case 'docx':
|
||
case 'doc':
|
||
// Catdoc
|
||
if ($indexerConfig['catdoc']) {
|
||
return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.DOC'), $extension);
|
||
}
|
||
break;
|
||
case 'pot': // MS PowerPoint
|
||
case 'pptx': // MS PowerPoint
|
||
case 'ppsx': // ...
|
||
case 'potx':
|
||
case 'pps': // MS PowerPoint(?)
|
||
case 'ppt': // MS PowerPoint
|
||
// ppthtml
|
||
... | ... | |
return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.PP'), $extension);
|
||
}
|
||
break;
|
||
case 'xlt': // MS Excel
|
||
case 'xlsx': // MS Excel
|
||
case 'xltx': // ...
|
||
case 'xls': // MS Excel
|
||
// Xlhtml
|
||
if ($indexerConfig['xlhtml']) {
|
||
... | ... | |
}
|
||
}
|
||
break;
|
||
case 'docx':
|
||
case 'dotx':
|
||
case 'xlsx':
|
||
case 'xltx':
|
||
case 'pptx':
|
||
case 'ppsx':
|
||
case 'potx':
|
||
if($this->app['unzip']) {
|
||
#$fb = FirePHP::getInstance(true);
|
||
// content
|
||
switch($ext) {
|
||
case 'docx':
|
||
case 'dotx':
|
||
$cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' word/document.xml';
|
||
break;
|
||
case 'xlsx':
|
||
case 'xltx':
|
||
$cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml';
|
||
break;
|
||
case 'ppsx':
|
||
case 'pptx':
|
||
case 'potx':
|
||
$cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' ppt/slides/slide1.xml';
|
||
break;
|
||
}
|
||
exec($cmd,$res);
|
||
$content_xml = implode(chr(10),$res);
|
||
#$fb->log($content_xml);
|
||
unset($res);
|
||
$utf8_content = trim(strip_tags(str_replace('<',' <',$content_xml)));
|
||
$contentArr = $this->pObj->splitRegularContent($utf8_content);
|
||
// Metainformations:
|
||
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' docProps/core.xml';
|
||
exec($cmd,$res);
|
||
$core_xml = implode(chr(10),$res);
|
||
unset($res);
|
||
$coreContent = t3lib_div::xml2tree($core_xml);
|
||
$contentArr['title'] = basename($absFile);
|
||
$contentArr['title'] .= (string) ' '.$coreContent['cp:coreProperties'][0]['ch']['dc:title'][0]['values'][0];
|
||
$contentArr['description'] = (string) $coreContent['cp:coreProperties'][0]['ch']['dc:subject'][0]['values'][0];
|
||
$contentArr['description'] .= ' '.$coreContent['cp:coreProperties'][0]['ch']['dc:description'][0]['values'][0];
|
||
# $contentArr['description'] .= $coreContent['cp:coreProperties'][0]['ch']['dc:creator'][0]['values'][0];
|
||
$contentArr['keywords'] .= (string) $coreContent['cp:coreProperties'][0]['ch']['cp:keywords'][0]['values'][0];
|
||
}
|
||
break;
|
||
case 'rtf':
|
||
if ($this->app['unrtf']) {
|
||
$cmd = $this->app['unrtf'] . ' ' . escapeshellarg($absFile);
|
||
... | ... | |
function getIcon($extension) {
|
||
if ($extension=='htm') $extension = 'html';
|
||
if ($extension=='jpeg') $extension = 'jpg';
|
||
/** Added by st on 26.5.2011 */
|
||
if ($extension == 'docx') $extension = 'doc';
|
||
if ($extension == 'dotx') $extension = 'dot';
|
||
if ($extension == 'xlsx') $extension = 'xls';
|
||
if ($extension == 'xltx') $extension = 'xls';
|
||
if ($extension == 'pptx') $extension = 'ppt';
|
||
if ($extension == 'ppsx') $extension = 'pps';
|
||
/** End */
|
||
return 'EXT:indexed_search/pi/res/'.$extension.'.gif';
|
||
}
|
||
}
|
||
if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search/class.external_parser.php'])) {
|
||
include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search/class.external_parser.php']);
|
||
if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/cabag_patch/typo3_versions/'.TYPO3_version.'/class.ux_external_parser.php'])) {
|
||
include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/cabag_patch/typo3_versions/'.TYPO3_version.'/class.ux_external_parser.php']);
|
||
}
|
||
?>
|
||
?>
|