Project

General

Profile

Bug #99527 » G9-content-in-pptx-xlsx.patch

Xavier Perseguers, 2023-01-12 14:24

View differences:

Classes/FileContentParser.php 2022-12-13 10:53:21 → Classes/FileContentParser.php 2023-01-12 13:59:16
case 'xltx':
if ($this->app['unzip']) {
$this->setLocaleForServerFileSystem();
$utf8_content = null;
switch ($ext) {
case 'docx':
case 'dotx':
......
case 'ppsx':
case 'pptx':
case 'potx':
// Read slide1.xml:
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ppt/slides/slide1.xml';
$utf8_content = $this->extractPptxContent($absFile);
break;
case 'xlsx':
case 'xltx':
// Read sheet1.xml:
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml';
// Read sharedStrings.xml:
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/sharedStrings.xml';
break;
default:
$cmd = '';
break;
}
CommandUtility::exec($cmd, $res);
$content_xml = implode(LF, $res);
unset($res);
$utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml)));
if ($utf8_content === null) {
CommandUtility::exec($cmd, $res);
$content_xml = implode(LF, $res);
unset($res);
$utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml)));
}
$contentArr = $this->pObj->splitRegularContent($utf8_content);
// Make sure the title doesn't expose the absolute path!
$contentArr['title'] = PathUtility::basename($absFile);
......
$contentArr['title'] = str_replace('_', ' ', PathUtility::basename($absFile));
}
return $contentArr;
}
/**
* @param string $absFile Absolute filename of file (must exist and be validated OK before calling function)
* @return string
*/
protected function extractPptxContent($absFile)
{
// Extract the list of slides:
$cmd = $this->app['unzip'] . ' -l ' . escapeshellarg($absFile);
CommandUtility::exec($cmd, $res);
$buffer = [];
foreach ($res as $line) {
if (preg_match('#\s+(ppt/slides/slide\d+.xml)$#', $line, $matches)) {
$slideFile = $matches[1];
// Extract the content of the slide:
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ' . $slideFile;
CommandUtility::exec($cmd, $xml);
$content_xml = implode(LF, $xml);
unset($xml);
$buffer[] = trim(strip_tags(str_replace('<', ' <', $content_xml)));
}
}
return trim(implode(LF, $buffer));
}
/**
    (1-1/1)