--- a/Classes/FileContentParser.php 2022-12-13 10:53:21 +++ b/Classes/FileContentParser.php 2023-01-12 13:59:16 @@ -548,6 +548,7 @@ case 'xltx': if ($this->app['unzip']) { $this->setLocaleForServerFileSystem(); + $utf8_content = null; switch ($ext) { case 'docx': case 'dotx': @@ -557,22 +558,23 @@ case 'ppsx': case 'pptx': case 'potx': - // Read slide1.xml: - $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ppt/slides/slide1.xml'; + $utf8_content = $this->extractPptxContent($absFile); break; case 'xlsx': case 'xltx': - // Read sheet1.xml: - $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml'; + // Read sharedStrings.xml: + $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/sharedStrings.xml'; break; default: $cmd = ''; break; } - CommandUtility::exec($cmd, $res); - $content_xml = implode(LF, $res); - unset($res); - $utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml))); + if ($utf8_content === null) { + CommandUtility::exec($cmd, $res); + $content_xml = implode(LF, $res); + unset($res); + $utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml))); + } $contentArr = $this->pObj->splitRegularContent($utf8_content); // Make sure the title doesn't expose the absolute path! $contentArr['title'] = PathUtility::basename($absFile); @@ -703,6 +705,32 @@ $contentArr['title'] = str_replace('_', ' ', PathUtility::basename($absFile)); } return $contentArr; + } + + /** + * @param string $absFile Absolute filename of file (must exist and be validated OK before calling function) + * @return string + */ + protected function extractPptxContent($absFile) + { + // Extract the list of slides: + $cmd = $this->app['unzip'] . ' -l ' . escapeshellarg($absFile); + CommandUtility::exec($cmd, $res); + + $buffer = []; + foreach ($res as $line) { + if (preg_match('#\s+(ppt/slides/slide\d+.xml)$#', $line, $matches)) { + $slideFile = $matches[1]; + // Extract the content of the slide: + $cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ' . $slideFile; + CommandUtility::exec($cmd, $xml); + $content_xml = implode(LF, $xml); + unset($xml); + $buffer[] = trim(strip_tags(str_replace('<', ' <', $content_xml))); + } + } + + return trim(implode(LF, $buffer)); } /**