Bug #18535 » bug_7984_v5_typo3_src-trunk.diff
typo3/sysext/cms/tslib/class.tslib_content.php (working copy) | ||
---|---|---|
if ($conf['bytes']){$content=t3lib_div::formatSize($content, $conf['bytes.']['labels']);}
|
||
if ($conf['substring']){$content=$this->substring($content,$conf['substring']);}
|
||
if ($conf['removeBadHTML']) {$content = $this->removeBadHTML($content, $conf['removeBadHTML.']);}
|
||
if ($conf['cropHTML']){$content=$this->cropHTML($content, $conf['cropHTML']);}
|
||
if ($conf['stripHtml']){$content = strip_tags($content);}
|
||
if ($conf['crop']){$content=$this->crop($content, $conf['crop']);}
|
||
if ($conf['rawUrlEncode']){$content = rawurlencode($content);}
|
||
... | ... | |
}
|
||
/**
|
||
* Implements the stdWrap property "cropHTML" which is a modified "substr" function allowing to limit a string length
|
||
* to a certain number of chars (from either start or end of string) and having a pre/postfix applied if the string
|
||
* really was cropped.
|
||
*
|
||
* Compared to stdWrap.crop it respects HTML tags and entities.
|
||
*
|
||
* @param string The string to perform the operation on
|
||
* @param string The parameters splitted by "|": First parameter is the max number of chars of the string. Negative value means cropping from end of string. Second parameter is the pre/postfix string to apply if cropping occurs. Third parameter is a boolean value. If set then crop will be applied at nearest space.
|
||
* @return string The processed input value.
|
||
* @access private
|
||
* @see stdWrap()
|
||
*/
|
||
function cropHTML($content, $options) {
|
||
$options = explode('|', $options);
|
||
$chars = intval($options[0]);
|
||
$absChars = abs($chars);
|
||
$replacementForEllipsis = trim($options[1]);
|
||
$crop2space = isset($options[2]) ? trim($options[2]) : 0;
|
||
// Split $content into an array (even items in the array are outside the tags, odd numbers are tag-blocks).
|
||
$tags= 'a|b|blockquote|body|div|em|font|form|h1|h2|h3|h4|h5|h6|i|li|map|ol|option|p|pre|sub|sup|select|span|strong|table|thead|tbody|tfoot|td|textarea|tr|u|ul|br|hr|img|input|area|link';
|
||
// TODO We should not crop inside <script> tags.
|
||
$tagsRegEx = "
|
||
(
|
||
(?:
|
||
<!--.*?--> # a comment
|
||
)
|
||
|
|
||
</?(?:". $tags . ")+ # opening tag ('<tag') or closing tag ('</tag')
|
||
(?:
|
||
(?:
|
||
\s+\w+ # EITHER spaces, followed by word characters (attribute names)
|
||
(?:
|
||
\s*=?\s* # equals
|
||
(?>
|
||
\".*?\" # attribute values in double-quotes
|
||
|
|
||
'.*?' # attribute values in single-quotes
|
||
|
|
||
[^'\">\s]+ # plain attribute values
|
||
)
|
||
)?
|
||
)+\s*
|
||
| # OR only spaces
|
||
\s*
|
||
)
|
||
/?> # closing the tag with '>' or '/>'
|
||
)";
|
||
$splittedContent = preg_split('%' . $tagsRegEx . '%xs', $content, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||
|
||
// Reverse array if we are cropping from right.
|
||
if ($chars < 0) {
|
||
$splittedContent = array_reverse($splittedContent);
|
||
}
|
||
// Crop the text (chars of tag-blocks are not counted).
|
||
$strLen = 0;
|
||
$croppedOffset = NULL; // This is the offset of the content item which was cropped.
|
||
for ($offset = 0; $offset < count($splittedContent); $offset++) {
|
||
if ($offset%2 === 0) {
|
||
// html_entity_decode() supports the most often used charsets, but not all TYPO3 supports. This only may cause
|
||
// problems if you use entities AND an unsupported charset. Entities will then be counted as several single characters.
|
||
$thisStrLen = $GLOBALS['TSFE']->csConvObj->strlen($GLOBALS['TSFE']->renderCharset, html_entity_decode($splittedContent[$offset],ENT_COMPAT,$GLOBALS['TSFE']->renderCharset));
|
||
if (($strLen + $thisStrLen > $absChars)) {
|
||
$croppedOffset = $offset;
|
||
$cropPosition = $absChars - $strLen;
|
||
if ($crop2space) {
|
||
$cropRegEx = $chars < 0 ? '#(?<=\s).{0,' . $cropPosition . '}$#ui' : '#^.{0,' . $cropPosition . '}(?=\s)#ui';
|
||
} else {
|
||
// The snippets "&[^&\s;]{2,7};" in the RegEx below represents entities.
|
||
$cropRegEx = $chars < 0 ? '#(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}$#ui' : '#^(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}#ui';
|
||
}
|
||
if (preg_match($cropRegEx, $splittedContent[$offset], $croppedMatch)) {
|
||
$splittedContent[$offset] = $croppedMatch[0];
|
||
}
|
||
break;
|
||
} else {
|
||
$strLen += $thisStrLen;
|
||
}
|
||
}
|
||
}
|
||
// Close cropped tags.
|
||
$closingTags = array();
|
||
if($croppedOffset !== NULL) {
|
||
$tagName = '';
|
||
$openingTagRegEx = '#^<(\w+)(?:\s|>)#u';
|
||
$closingTagRegEx = '#^</(\w+)(?:\s|>)#u';
|
||
for ($offset=$croppedOffset-1; $offset >= 0; $offset = $offset-2) {
|
||
if (preg_match('&/>$&', $splittedContent[$offset])) {
|
||
// Ignore empty element tags (e.g. <br />).
|
||
continue;
|
||
}
|
||
preg_match($chars < 0 ? $closingTagRegEx : $openingTagRegEx, $splittedContent[$offset], $matches);
|
||
$tagName = isset($matches[1]) ? $matches[1] : NULL;
|
||
if ($tagName !== NULL) {
|
||
// Seek for the closing (or opening) tag.
|
||
$seekingTagName = '';
|
||
for ($seekingOffset = $offset + 2; $seekingOffset < count($splittedContent); $seekingOffset = $seekingOffset + 2) {
|
||
preg_match($chars < 0 ? $openingTagRegEx : $closingTagRegEx, $splittedContent[$seekingOffset], $matches);
|
||
$seekingTagName = isset($matches[1]) ? $matches[1] : NULL;
|
||
if ($tagName === $seekingTagName) { // We found a matching tag.
|
||
// Add closing tag only if it occurs after the cropped content item.
|
||
if ($seekingOffset > $croppedOffset) {
|
||
$closingTags[] = $splittedContent[$seekingOffset];
|
||
}
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
// Drop the cropped items of the content array. The $closingTags will be added later on again.
|
||
array_splice($splittedContent, $croppedOffset + 1);
|
||
}
|
||
$splittedContent = array_merge($splittedContent, array($croppedOffset !== NULL ? $replacementForEllipsis : ''), $closingTags);
|
||
// Reverse array once again if we are cropping from the end.
|
||
if ($chars < 0) {
|
||
$splittedContent = array_reverse($splittedContent);
|
||
}
|
||
return implode('', $splittedContent);
|
||
}
|
||
/**
|
||
* Function for removing malicious HTML code when you want to provide some HTML code user-editable.
|
||
* The purpose is to avoid XSS attacks and the code will be continously modified to remove such code.
|
||
* For a complete reference with javascript-on-events, see http://www.wdvl.com/Authoring/JavaScript/Events/events_target.html
|
- « Previous
- 1
- …
- 6
- 7
- 8
- Next »