Bug #99527 » G9-content-in-pptx-xlsx.patch
Classes/FileContentParser.php 2022-12-13 10:53:21 → Classes/FileContentParser.php 2023-01-12 13:59:16 | ||
---|---|---|
case 'xltx':
|
||
if ($this->app['unzip']) {
|
||
$this->setLocaleForServerFileSystem();
|
||
$utf8_content = null;
|
||
switch ($ext) {
|
||
case 'docx':
|
||
case 'dotx':
|
||
... | ... | |
case 'ppsx':
|
||
case 'pptx':
|
||
case 'potx':
|
||
// Read slide1.xml:
|
||
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ppt/slides/slide1.xml';
|
||
$utf8_content = $this->extractPptxContent($absFile);
|
||
break;
|
||
case 'xlsx':
|
||
case 'xltx':
|
||
// Read sheet1.xml:
|
||
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/worksheets/sheet1.xml';
|
||
// Read sharedStrings.xml:
|
||
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' xl/sharedStrings.xml';
|
||
break;
|
||
default:
|
||
$cmd = '';
|
||
break;
|
||
}
|
||
CommandUtility::exec($cmd, $res);
|
||
$content_xml = implode(LF, $res);
|
||
unset($res);
|
||
$utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml)));
|
||
if ($utf8_content === null) {
|
||
CommandUtility::exec($cmd, $res);
|
||
$content_xml = implode(LF, $res);
|
||
unset($res);
|
||
$utf8_content = trim(strip_tags(str_replace('<', ' <', $content_xml)));
|
||
}
|
||
$contentArr = $this->pObj->splitRegularContent($utf8_content);
|
||
// Make sure the title doesn't expose the absolute path!
|
||
$contentArr['title'] = PathUtility::basename($absFile);
|
||
... | ... | |
$contentArr['title'] = str_replace('_', ' ', PathUtility::basename($absFile));
|
||
}
|
||
return $contentArr;
|
||
}
|
||
/**
|
||
* @param string $absFile Absolute filename of file (must exist and be validated OK before calling function)
|
||
* @return string
|
||
*/
|
||
protected function extractPptxContent($absFile)
|
||
{
|
||
// Extract the list of slides:
|
||
$cmd = $this->app['unzip'] . ' -l ' . escapeshellarg($absFile);
|
||
CommandUtility::exec($cmd, $res);
|
||
$buffer = [];
|
||
foreach ($res as $line) {
|
||
if (preg_match('#\s+(ppt/slides/slide\d+.xml)$#', $line, $matches)) {
|
||
$slideFile = $matches[1];
|
||
// Extract the content of the slide:
|
||
$cmd = $this->app['unzip'] . ' -p ' . escapeshellarg($absFile) . ' ' . $slideFile;
|
||
CommandUtility::exec($cmd, $xml);
|
||
$content_xml = implode(LF, $xml);
|
||
unset($xml);
|
||
$buffer[] = trim(strip_tags(str_replace('<', ' <', $content_xml)));
|
||
}
|
||
}
|
||
return trim(implode(LF, $buffer));
|
||
}
|
||
/**
|