Project

General

Profile

Bug #18535 » bug_7984_v5_typo3_src-trunk.diff

Administrator Admin, 2009-08-27 11:31

View differences:

typo3/sysext/cms/tslib/class.tslib_content.php (working copy)
if ($conf['bytes']){$content=t3lib_div::formatSize($content, $conf['bytes.']['labels']);}
if ($conf['substring']){$content=$this->substring($content,$conf['substring']);}
if ($conf['removeBadHTML']) {$content = $this->removeBadHTML($content, $conf['removeBadHTML.']);}
if ($conf['cropHTML']){$content=$this->cropHTML($content, $conf['cropHTML']);}
if ($conf['stripHtml']){$content = strip_tags($content);}
if ($conf['crop']){$content=$this->crop($content, $conf['crop']);}
if ($conf['rawUrlEncode']){$content = rawurlencode($content);}
......
}
/**
* Implements the stdWrap property "cropHTML" which is a modified "substr" function allowing to limit a string length
* to a certain number of chars (from either start or end of string) and having a pre/postfix applied if the string
* really was cropped.
*
* Compared to stdWrap.crop it respects HTML tags and entities.
*
* @param string The string to perform the operation on
* @param string The parameters splitted by "|": First parameter is the max number of chars of the string. Negative value means cropping from end of string. Second parameter is the pre/postfix string to apply if cropping occurs. Third parameter is a boolean value. If set then crop will be applied at nearest space.
* @return string The processed input value.
* @access private
* @see stdWrap()
*/
function cropHTML($content, $options) {
$options = explode('|', $options);
$chars = intval($options[0]);
$absChars = abs($chars);
$replacementForEllipsis = trim($options[1]);
$crop2space = isset($options[2]) ? trim($options[2]) : 0;
// Split $content into an array (even items in the array are outside the tags, odd numbers are tag-blocks).
$tags= 'a|b|blockquote|body|div|em|font|form|h1|h2|h3|h4|h5|h6|i|li|map|ol|option|p|pre|sub|sup|select|span|strong|table|thead|tbody|tfoot|td|textarea|tr|u|ul|br|hr|img|input|area|link';
// TODO We should not crop inside <script> tags.
$tagsRegEx = "
(
(?:
<!--.*?--> # a comment
)
|
</?(?:". $tags . ")+ # opening tag ('<tag') or closing tag ('</tag')
(?:
(?:
\s+\w+ # EITHER spaces, followed by word characters (attribute names)
(?:
\s*=?\s* # equals
(?>
\".*?\" # attribute values in double-quotes
|
'.*?' # attribute values in single-quotes
|
[^'\">\s]+ # plain attribute values
)
)?
)+\s*
| # OR only spaces
\s*
)
/?> # closing the tag with '>' or '/>'
)";
$splittedContent = preg_split('%' . $tagsRegEx . '%xs', $content, -1, PREG_SPLIT_DELIM_CAPTURE);
// Reverse array if we are cropping from right.
if ($chars < 0) {
$splittedContent = array_reverse($splittedContent);
}
// Crop the text (chars of tag-blocks are not counted).
$strLen = 0;
$croppedOffset = NULL; // This is the offset of the content item which was cropped.
for ($offset = 0; $offset < count($splittedContent); $offset++) {
if ($offset%2 === 0) {
// html_entity_decode() supports the most often used charsets, but not all TYPO3 supports. This only may cause
// problems if you use entities AND an unsupported charset. Entities will then be counted as several single characters.
$thisStrLen = $GLOBALS['TSFE']->csConvObj->strlen($GLOBALS['TSFE']->renderCharset, html_entity_decode($splittedContent[$offset],ENT_COMPAT,$GLOBALS['TSFE']->renderCharset));
if (($strLen + $thisStrLen > $absChars)) {
$croppedOffset = $offset;
$cropPosition = $absChars - $strLen;
if ($crop2space) {
$cropRegEx = $chars < 0 ? '#(?<=\s).{0,' . $cropPosition . '}$#ui' : '#^.{0,' . $cropPosition . '}(?=\s)#ui';
} else {
// The snippets "&[^&\s;]{2,7};" in the RegEx below represents entities.
$cropRegEx = $chars < 0 ? '#(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}$#ui' : '#^(.(?![^&\s]{2,7};)|(&[^&\s;]{2,7};)){0,' . $cropPosition . '}#ui';
}
if (preg_match($cropRegEx, $splittedContent[$offset], $croppedMatch)) {
$splittedContent[$offset] = $croppedMatch[0];
}
break;
} else {
$strLen += $thisStrLen;
}
}
}
// Close cropped tags.
$closingTags = array();
if($croppedOffset !== NULL) {
$tagName = '';
$openingTagRegEx = '#^<(\w+)(?:\s|>)#u';
$closingTagRegEx = '#^</(\w+)(?:\s|>)#u';
for ($offset=$croppedOffset-1; $offset >= 0; $offset = $offset-2) {
if (preg_match('&/>$&', $splittedContent[$offset])) {
// Ignore empty element tags (e.g. <br />).
continue;
}
preg_match($chars < 0 ? $closingTagRegEx : $openingTagRegEx, $splittedContent[$offset], $matches);
$tagName = isset($matches[1]) ? $matches[1] : NULL;
if ($tagName !== NULL) {
// Seek for the closing (or opening) tag.
$seekingTagName = '';
for ($seekingOffset = $offset + 2; $seekingOffset < count($splittedContent); $seekingOffset = $seekingOffset + 2) {
preg_match($chars < 0 ? $openingTagRegEx : $closingTagRegEx, $splittedContent[$seekingOffset], $matches);
$seekingTagName = isset($matches[1]) ? $matches[1] : NULL;
if ($tagName === $seekingTagName) { // We found a matching tag.
// Add closing tag only if it occurs after the cropped content item.
if ($seekingOffset > $croppedOffset) {
$closingTags[] = $splittedContent[$seekingOffset];
}
break;
}
}
}
}
// Drop the cropped items of the content array. The $closingTags will be added later on again.
array_splice($splittedContent, $croppedOffset + 1);
}
$splittedContent = array_merge($splittedContent, array($croppedOffset !== NULL ? $replacementForEllipsis : ''), $closingTags);
// Reverse array once again if we are cropping from the end.
if ($chars < 0) {
$splittedContent = array_reverse($splittedContent);
}
return implode('', $splittedContent);
}
/**
* Function for removing malicious HTML code when you want to provide some HTML code user-editable.
* The purpose is to avoid XSS attacks and the code will be continously modified to remove such code.
* For a complete reference with javascript-on-events, see http://www.wdvl.com/Authoring/JavaScript/Events/events_target.html
(8-8/8)