--- typo3/sysext/indexed_search/class.external_parser.php 2012-05-22 12:00:14.000000000 +0200 +++ typo3conf/ext/cabag_patch/typo3_versions/4.5.16/class.ux_external_parser.php 2012-06-06 15:47:42.063007982 +0200 @@ -72,7 +72,7 @@ * @package TYPO3 * @subpackage tx_indexedsearch */ -class tx_indexed_search_extparse { +class ux_tx_indexed_search_extparse extends tx_indexed_search_extparse { // This value is also overridden from config. var $pdf_mode = -20; // zero: whole PDF file is indexed in one. positive value: Indicates number of pages at a time, eg. "5" would means 1-5,6-10,.... Negative integer would indicate (abs value) number of groups. Eg "3" groups of 10 pages would be 1-4,5-8,9-10 @@ -125,8 +125,8 @@ if ($indexerConfig['pdftools']) { $pdfPath = rtrim($indexerConfig['pdftools'], '/').'/'; if ($safeModeEnabled || (@is_file($pdfPath . 'pdftotext' . $exe) && @is_file($pdfPath . 'pdfinfo' . $exe))) { - $this->app['pdfinfo'] = $pdfPath.'pdfinfo'.$exe; - $this->app['pdftotext'] = $pdfPath.'pdftotext'.$exe; + $this->app['pdfinfo'] = '"'.$pdfPath.'pdfinfo'.$exe.'"'; + $this->app['pdftotext'] = '"'.$pdfPath.'pdftotext'.$exe.'"'; // PDF mode: $this->pdf_mode = t3lib_div::intInRange($indexerConfig['pdf_mode'],-100,100); $extOK = TRUE; @@ -138,7 +138,7 @@ if ($indexerConfig['catdoc']) { $catdocPath = rtrim($indexerConfig['catdoc'], '/').'/'; if ($safeModeEnabled || @is_file($catdocPath . 'catdoc' . $exe)) { - $this->app['catdoc'] = $catdocPath.'catdoc'.$exe; + $this->app['catdoc'] = '"'.$catdocPath.'catdoc'.$exe.'"'; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:catdocNotFound'), $catdocPath), 3); } else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:catdocDisabled'), 1); @@ -149,7 +149,7 @@ if ($indexerConfig['ppthtml']) { $ppthtmlPath = rtrim($indexerConfig['ppthtml'], '/').'/'; if ($safeModeEnabled || @is_file($ppthtmlPath . 'ppthtml' . $exe)) { - $this->app['ppthtml'] = $ppthtmlPath.'ppthtml'.$exe; + $this->app['ppthtml'] = '"'.$ppthtmlPath.'ppthtml'.$exe.'"'; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:ppthtmlNotFound'), $ppthtmlPath), 3); } else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:ppthtmlDisabled'), 1); @@ -159,7 +159,7 @@ if ($indexerConfig['xlhtml']) { $xlhtmlPath = rtrim($indexerConfig['xlhtml'], '/').'/'; if ($safeModeEnabled || @is_file($xlhtmlPath . 'xlhtml' . $exe)) { - $this->app['xlhtml'] = $xlhtmlPath.'xlhtml'.$exe; + $this->app['xlhtml'] = '"'.$xlhtmlPath.'xlhtml'.$exe.'"'; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:xlhtmlNotFound'), $xlhtmlPath), 3); } else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:xlhtmlDisabled'), 1); @@ -173,17 +173,32 @@ if ($indexerConfig['unzip']) { $unzipPath = rtrim($indexerConfig['unzip'], '/').'/'; if ($safeModeEnabled || @is_file($unzipPath . 'unzip' . $exe)) { - $this->app['unzip'] = $unzipPath.'unzip'.$exe; + $this->app['unzip'] = '"'.$unzipPath.'unzip'.$exe.'"'; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:unzipNotFound'), $unzipPath), 3); } else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:unzipDisabled'), 1); break; + case 'docx': // Microsoft Word 2007/2008 + case 'dotx': + case 'xlsx': // Microsoft Excel 2007/2008 + case 'xltx': + case 'pptx': // Microsoft PowerPoint + case 'ppsx': + case 'potx': + if ($indexerConfig['unzip']) { + $unzipPath = rtrim($indexerConfig['unzip'], '/').'/'; + if (ini_get('safe_mode') || @is_file($unzipPath.'unzip'.$exe)) { + $this->app['unzip'] = '"'.$unzipPath.'unzip'.$exe.'"'; + $extOK = TRUE; + } else $this->pObj->log_setTSlogMessage("'unzip' tool for reading DOCX/XLSX/PPTX files was not found in path '".$unzipPath."unzip'",3); + } else $this->pObj->log_setTSlogMessage('unzip tool (docx/pptx/xlsx) disabled', 1); + break; case 'rtf': // Catdoc if ($indexerConfig['unrtf']) { $unrtfPath = rtrim($indexerConfig['unrtf'], '/').'/'; if ($safeModeEnabled || @is_file($unrtfPath . 'unrtf' . $exe)) { - $this->app['unrtf'] = $unrtfPath.'unrtf'.$exe; + $this->app['unrtf'] = '"'.$unrtfPath.'unrtf'.$exe.'"'; $extOK = TRUE; } else $this->pObj->log_setTSlogMessage(sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:unrtfNotFound'), $unrtfPath), 3); } else $this->pObj->log_setTSlogMessage($this->sL('LLL:EXT:indexed_search/locallang.xml:unrtfDisabled'), 1); @@ -224,7 +239,14 @@ function softInit($extension) { switch($extension) { case 'pdf': // PDF - case 'doc': // MS Word files + case 'doc': // MS Word filescase 'docx': + case 'docx': // \ + case 'dotx': // \ + case 'pptx': // -- Microsoft Office 2007/2008 + case 'ppsx': // / + case 'potx': // / + case 'xlsx': // / + case 'xltx': case 'pps': // MS PowerPoint case 'ppt': // MS PowerPoint case 'xls': // MS Excel @@ -273,12 +295,18 @@ return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.PDF'), $extension); } break; + case 'dot': + case 'docx': case 'doc': // Catdoc if ($indexerConfig['catdoc']) { return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.DOC'), $extension); } break; + case 'pot': // MS PowerPoint + case 'pptx': // MS PowerPoint + case 'ppsx': // ... + case 'potx': case 'pps': // MS PowerPoint(?) case 'ppt': // MS PowerPoint // ppthtml @@ -286,6 +314,9 @@ return sprintf($this->sL('LLL:EXT:indexed_search/locallang.xml:extension.PP'), $extension); } break; + case 'xlt': // MS Excel + case 'xlsx': // MS Excel + case 'xltx': // ... case 'xls': // MS Excel // Xlhtml if ($indexerConfig['xlhtml']) { @@ -506,6 +537,57 @@ } } break; + case 'docx': + case 'dotx': + case 'xlsx': + case 'xltx': + case 'pptx': + case 'ppsx': + case 'potx': + if($this->app['unzip']) { + #$fb = FirePHP::getInstance(true); + + // content + switch($ext) { + case 'docx': + case 'dotx': + $cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' word/document.xml'; + break; + case 'xlsx': + case 'xltx': + $cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml'; + break; + case 'ppsx': + case 'pptx': + case 'potx': + $cmd = $this->app['unzip'] . ' -p '. escapeshellarg($absFile) . ' ppt/slides/slide1.xml'; + break; + } + exec($cmd,$res); + $content_xml = implode(chr(10),$res); + #$fb->log($content_xml); + unset($res); + + $utf8_content = trim(strip_tags(str_replace('<',' <',$content_xml))); + $contentArr = $this->pObj->splitRegularContent($utf8_content); + + // Metainformations: + $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' docProps/core.xml'; + exec($cmd,$res); + $core_xml = implode(chr(10),$res); + unset($res); + + $coreContent = t3lib_div::xml2tree($core_xml); + $contentArr['title'] = basename($absFile); + $contentArr['title'] .= (string) ' '.$coreContent['cp:coreProperties'][0]['ch']['dc:title'][0]['values'][0]; + $contentArr['description'] = (string) $coreContent['cp:coreProperties'][0]['ch']['dc:subject'][0]['values'][0]; + $contentArr['description'] .= ' '.$coreContent['cp:coreProperties'][0]['ch']['dc:description'][0]['values'][0]; + # $contentArr['description'] .= $coreContent['cp:coreProperties'][0]['ch']['dc:creator'][0]['values'][0]; + + $contentArr['keywords'] .= (string) $coreContent['cp:coreProperties'][0]['ch']['cp:keywords'][0]['values'][0]; + + } + break; case 'rtf': if ($this->app['unrtf']) { $cmd = $this->app['unrtf'] . ' ' . escapeshellarg($absFile); @@ -668,12 +750,20 @@ function getIcon($extension) { if ($extension=='htm') $extension = 'html'; if ($extension=='jpeg') $extension = 'jpg'; + /** Added by st on 26.5.2011 */ + if ($extension == 'docx') $extension = 'doc'; + if ($extension == 'dotx') $extension = 'dot'; + if ($extension == 'xlsx') $extension = 'xls'; + if ($extension == 'xltx') $extension = 'xls'; + if ($extension == 'pptx') $extension = 'ppt'; + if ($extension == 'ppsx') $extension = 'pps'; + /** End */ return 'EXT:indexed_search/pi/res/'.$extension.'.gif'; } } -if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search/class.external_parser.php'])) { - include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/indexed_search/class.external_parser.php']); +if (defined('TYPO3_MODE') && isset($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/cabag_patch/typo3_versions/'.TYPO3_version.'/class.ux_external_parser.php'])) { + include_once($GLOBALS['TYPO3_CONF_VARS'][TYPO3_MODE]['XCLASS']['ext/cabag_patch/typo3_versions/'.TYPO3_version.'/class.ux_external_parser.php']); } -?> \ No newline at end of file +?>