preg_t3lib_parsehtml.diff - TYPO3 Core - TYPO3 Forge

Bug #15121 » preg_t3lib_parsehtml.diff

Administrator Admin, 2005-10-27 00:46

      * @package TYPO3
      * @subpackage t3lib
      */
     class t3lib_parsehtml {
     class t3lib_parsehtml	{
     	var $caseShift_cache=array();
-...
     	 * @return	string
     	 */
     	function getSubpart($content, $marker)	{
     		if ($marker && strstr($content,$marker))	{
     			$start = strpos($content, $marker)+strlen($marker);
     			$stop = @strpos($content, $marker, $start+1);
     			$sub = substr($content, $start, $stop-$start);
     			$reg=Array();
     			ereg('^[^<]*-->',$sub,$reg);
     			$start+=strlen($reg[0]);
     			$reg=Array();
     			ereg('<!--[^>]*$',$sub,$reg);
     			$stop-=strlen($reg[0]);
     			return substr($content, $start, $stop-$start);
     		$start = strpos($content, $marker);
     		if ($start===false)	{ return ''; }
     		$start += strlen($marker);
     		$stop = strpos($content, $marker, $start);
     			// Q: What shall get returned if no stop marker is given /*everything till the end*/ or nothing
     		if ($stop===false)	{ return /*substr($content, $start)*/ ''; }
     		$content = substr($content, $start, $stop-$start);
     		if (preg_match('/^([^\<\>]*\-\-\>)?(.*?)(\<\!\-\-[^\<\>]*)?$/s', $content, $matches)===1)	{
     			return $matches[2];
+    		}
     		return $content;
+    	}
     	/**
-...
     	 */
     	function substituteSubpart($content,$marker,$subpartContent,$recursive=1,$keepMarker=0)	{
     		$start = strpos($content, $marker);
     		$stop = @strpos($content, $marker, $start+1)+strlen($marker);
     		if ($start && $stop>$start)	{
     			// code before
     			$before = substr($content, 0, $start);
     			$reg=Array();
     			ereg('<!--[^>]*$',$before,$reg);
     			$start-=strlen($reg[0]);
     			if ($keepMarker)	{
     				$reg_k=Array();
     				if ($reg[0])	ereg('^[^>]*-->',substr($content,$start),$reg_k);
     				$before_marker = substr($content, $start, strlen($reg_k[0]?$reg_k[0]:$marker));
+    			}
     			$before = substr($content, 0, $start);
     				// code after
     			$after = substr($content, $stop);
     			$reg=Array();
     			ereg('^[^<]*-->',$after,$reg);
     			$stop+=strlen($reg[0]);
     			if ($keepMarker)	{
     				$reg_k=Array();
     				if ($reg[0])	ereg('<!--[^<]*$',substr($content,0,$stop),$reg_k);
     				$sLen = strlen($reg_k[0]?$reg_k[0]:$marker);
     				$after_marker = substr($content, $stop-$sLen,$sLen);
     		if ($start===false)	{ return $content; }
     		$startAM = $start+strlen($marker);
     		$stop = strpos($content, $marker, $startAM);
     		if ($stop===false)	{ return $content; }
     		$stopAM = $stop+strlen($marker);
     		$before = substr($content, 0, $start);
     		$after = substr($content, $stopAM);
     		$between = substr($content, $startAM, $stop-$startAM);
     		if ($recursive)	{
     			$after = $this->substituteSubpart($after, $marker, $subpartContent, $recursive, $keepMarker);
+    		}
     		if ($keepMarker)	{
     			if (preg_match('/^([^\<\>]*\-\-\>)?(.*?)(\<\!\-\-[^\<\>]*)?$/s', $between, $matches)===1)	{
     				$before .= $marker.$matches[1];
     				$between = $matches[2];
     				$after = $matches[3].$marker.$after;
     			} else	{
     				$before .= $marker;
     				$before .= $marker.$after;
+    			}
     		} else	{
     			if (preg_match('/^(.*)\<\!\-\-[^\<\>]*$/s', $before, $matches)===1)	{
     				$before = $matches[1];
+    			}
     			$after = substr($content, $stop);
     				// replace?
     			if (is_array($subpartContent))	{
     				$substContent=$subpartContent[0].$this->getSubpart($content,$marker).$subpartContent[1];
     			} else {
     				$substContent=$subpartContent;
     				if (preg_match('/^([^\<\>]*\-\-\>)?(.*?)(\<\!\-\-[^\<\>]*)?$/s', $between, $matches)===1)	{
     					$between = $matches[2];
+    				}
+    			}
     			if ($recursive && strpos($after, $marker))	{
     				return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$this->substituteSubpart($after,$marker,$subpartContent);
     			} else {
     				return $before.($keepMarker?$before_marker:'').$substContent.($keepMarker?$after_marker:'').$after;
     			if (preg_match('/^[^\<\>]*\-\-\>(.*)$/s', $after, $matches)===1)	{
     				$after = $matches[1];
+    			}
     		} else {
     			return $content;
+    		}
     		if (is_array($subpartContent))	{
     			$between = $subpartContent[0].$between.$subpartContent[1];
     		} else	{
     			$between = $subpartContent;
+    		}
     		return $before.$between.$after;
+    	}
     	// *******************************************'
     	// COPY FROM class.tslib_content.php: / END
     	// *******************************************'
-...
     	 */
     	function splitIntoBlock($tag,$content,$eliminateExtraEndTags=0)	{
     		$tags=array_unique(t3lib_div::trimExplode(',',$tag,1));
     		$regexStr = '</?('.implode('|',$tags).')(>|[[:space:]][^>]*>)';
     		$regexStr = '/\<\/?('.implode('|', $tags).')(\s*\>|\s[^\>]*\>)/si';
     		$parts = spliti($regexStr,$content);
     		$parts = preg_split($regexStr, $content);
     		$newParts=array();
     		$pointer=strlen($parts[0]);
-...
     	 */
     	function splitTags($tag,$content)	{
     		$tags = t3lib_div::trimExplode(',',$tag,1);
     		$regexStr = '<('.implode('|',$tags).')(>|\/>|[[:space:]][^>]*>)';
     		$parts = spliti($regexStr,$content);
     		$regexStr = '/\<('.implode('|', $tags).')(\s[^>]*)?\/?>/si';
     		$parts = preg_split($regexStr, $content);
     		$pointer = strlen($parts[0]);
     		$newParts = array();
-...
     	 * @see splitIntoBlock(), splitTags()
     	 */
     	function getAllParts($parts,$tag_parts=1,$include_tag=1)	{
     		reset($parts);
     		$newParts=array();
     		while(list($k,$v)=each($parts))	{
     		foreach ($parts as $k => $v)	{
     			if (($k+($tag_parts?0:1))%2)	{
     				if (!$include_tag)	$v=$this->removeFirstAndLastTag($v);
     				$newParts[]=$v;
-...
     	/**
     	 * Removes the first and last tag in the string
     	 * Anything before and after the first and last tags respectively is also removed
     	 * Anything before the first and after the last tags respectively is also removed
+    	 *
     	 * @param	string		String to process
     	 * @return	string
     	 */
     	function removeFirstAndLastTag($str)	{
     			// First:
     		$endLen = strcspn($str,'>')+1;
     		$str = substr($str,$endLen);
     			// Last:
     		$str = strrev($str);
     		$endLen = strcspn($str,'<')+1;
     		$str = substr($str,$endLen);
     			// End of first tag:
     		$start = strpos($str,'>');
     			// Begin of last tag:
     		$end = strrpos($str,'<');
     			// return
     		return strrev($str);
     		return substr($str, $start+1, $end-$start-1);
+    	}
     	/**
-...
     	 */
     	function getFirstTag($str)	{
     			// First:
     		$endLen = strcspn($str,'>')+1;
     		$str = substr($str,0,$endLen);
     		return $str;
     		$endLen = strpos($str,'>')+1;
     		return substr($str,0,$endLen);
+    	}
     	/**
-...
     	 * @see getFirstTag()
     	 */
     	function getFirstTagName($str,$preserveCase=FALSE)	{
     		list($tag) = split('[[:space:]]',substr(trim($this->getFirstTag($str)),1,-1), 2);
     		if (!$preserveCase)	$tag = strtoupper($tag);
     		return trim($tag);
     		if (preg_match('/^\s*\<([^\s\>]+)(\s|\>)/', $str, $matches)===1)	{
     			if (!$preserveCase)	{
     				return strtoupper($matches[1]);
+    			}
     			return $matches[1];
+    		}
     		return '';
+    	}
     	/**
-...
     	function get_tag_attributes($tag,$deHSC=0)	{
     		list($components,$metaC) = $this->split_tag_attributes($tag);
     		$name = '';	 // attribute name is stored here
     		$valuemode = '';
     		$valuemode = false;
     		$attributes = array();
     		$attributesMeta = array();
     		if (is_array($components))	{
     			while (list($key,$val) = each ($components))	{
     			foreach ($components as $key => $val)	{
     				if ($val != '=')	{	// Only if $name is set (if there is an attribute, that waits for a value), that valuemode is enabled. This ensures that the attribute is assigned it's value
     					if ($valuemode)	{
     						if ($name)	{
-...
     							$name = '';
+    						}
     					} else {
     						if ($namekey = ereg_replace('[^a-zA-Z0-9_:-]','',$val))	{
     						if ($namekey = preg_replace('/[^[:alnum:]_\:\-]/','',$val))	{
     							$name = strtolower($namekey);
     							$attributesMeta[$name]=array();
     							$attributesMeta[$name]['origTag']=$namekey;
     							$attributes[$name] = '';
+    						}
+    					}
     					$valuemode = '';
     					$valuemode = false;
     				} else {
     					$valuemode = 'on';
     					$valuemode = true;
+    				}
+    			}
     			if (is_array($attributes))	reset($attributes);
     			return array($attributes,$attributesMeta);
+    		}
+    	}
-...
     	 * @see t3lib_div::split_tag_attributes()
     	 */
     	function split_tag_attributes($tag)	{
     		$tag_tmp = trim(eregi_replace ('^<[^[:space:]]*','',trim($tag)));
     			// Removes any > in the end of the string
     		$tag_tmp = trim(eregi_replace ('>$','',$tag_tmp));
     		if (preg_match('/(\<[^\s]+\s+)?(.*?)\s*(\>)?$/s', $tag, $matches)!==1)	{
     			return array(array(), array());
+    		}
     		$tag_tmp = $matches[2];
     		$metaValue = array();
     		$value = array();
     		while (strcmp($tag_tmp,''))	{	// Compared with empty string instead , 030102
     			$firstChar=substr($tag_tmp,0,1);
     			if (!strcmp($firstChar,'"') || !strcmp($firstChar,"'"))	{
     				$reg=explode($firstChar,$tag_tmp,3);
     				$value[]=$reg[1];
     				$metaValue[]=$firstChar;
     				$tag_tmp=trim($reg[2]);
     			} elseif (!strcmp($firstChar,'=')) {
     				$value[] = '=';
     				$metaValue[]='';
     				$tag_tmp = trim(substr($tag_tmp,1));		// Removes = chars.
     			} else {
     					// There are '' around the value. We look for the next ' ' or '>'
     				$reg = split('[[:space:]=]',$tag_tmp,2);
     				$value[] = trim($reg[0]);
     				$metaValue[]='';
     				$tag_tmp = trim(substr($tag_tmp,strlen($reg[0]),1).$reg[1]);
     		if (preg_match_all('/("[^"]*"|\'[^\']*\'|[^\s"\'\=]+|\=)/s', $tag_tmp, $matches)>0)	{
     			foreach ($matches[1] as $part)	{
     				$firstChar = substr($part, 0, 1);
     				if ($firstChar=='"' || $firstChar=="'")	{
     					$metaValue[] = $firstChar;
     					$value[] = substr($part, 1, -1);
     				} else	{
     					$metaValue[] = '';
     					$value[] = $part;
+    				}
+    			}
+    		}
     		if (is_array($value))	reset($value);
     		return array($value,$metaValue);
+    	}
-...
     			// Block tags, must have endings...
     		$blockTags = explode(',',$blockTags);
     		foreach($blockTags as $tagName)	{
     			$countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
     			$countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
     			$countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
     			$countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
     			$analyzedOutput['blocks'][$tagName]=array($countBegin,$countEnd,$countBegin-$countEnd);
     			if ($countBegin)	$analyzedOutput['counts'][$tagName]=$countBegin;
     			if ($countBegin-$countEnd)	{
-...
     			// Solo tags, must NOT have endings...
     		$soloTags = explode(',',$soloTags);
     		foreach($soloTags as $tagName)	{
     			$countBegin = count(split('<'.$tagName.'[^[:alnum:]]',$content))-1;
     			$countEnd = count(split('<\/'.$tagName.'[^[:alnum:]]',$content))-1;
     			$countBegin = count(preg_split('/\<'.$tagName.'(\s|\>)/s',$content))-1;
     			$countEnd = count(preg_split('/\<\/'.$tagName.'(\s|\>)/s',$content))-1;
     			$analyzedOutput['solo'][$tagName]=array($countBegin,$countEnd);
     			if ($countBegin)	$analyzedOutput['counts'][$tagName]=$countBegin;
     			if ($countEnd)	{
-...
     		while(list(,$tok)=each($tokArr))	{
     			$firstChar = substr($tok,0,1);
     #			if (strcmp(trim($firstChar),''))	{		// It is a tag...
     			if (ereg('[[:alnum:]\/]',$firstChar))	{		// It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
     				$tagEnd = strcspn($tok,'>');
     				if (strlen($tok)!=$tagEnd)	{	// If there is and end-bracket...
     			if (preg_match('/[[:alnum:]\/]/',$firstChar)==1)	{		// It is a tag... (first char is a-z0-9 or /) (fixed 19/01 2004). This also avoids triggering on <?xml..> and <!DOCTYPE..>
     				$tagEnd = strpos($tok,'>');
     				if ($tagEnd)	{	// If there is and end-bracket...	tagEnd can't be 0 as the first character can't be a >
     					$endTag = $firstChar=='/' ? 1 : 0;
     					$tagContent = substr($tok,$endTag,$tagEnd-$endTag);
     					$tagParts = split('[[:space:]]',$tagContent,2);
     					$tagParts = preg_split('/\s+/s',$tagContent,2);
     					$tagName = strtolower($tagParts[0]);
     					if (isset($tags[$tagName]))	{
     						if (is_array($tags[$tagName]))	{	// If there is processing to do for the tag:
-...
     										$tagAttrib = $this->get_tag_attributes($tagParts[1]);
     										$tagParts[1]='';
     										$newTagAttrib = array();
     										$tList = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
     										while(list(,$allowTag)=each($tList))	{
     										if (!($tList = $tags[$tagName]['_allowedAttribs']))	{
     												// Just explode attribts for tag once
     											$tList = $tags[$tagName]['_allowedAttribs'] = t3lib_div::trimExplode(',',strtolower($tags[$tagName]['allowedAttribs']),1);
+    										}
     										foreach ($tList as $allowTag)	{
     											if (isset($tagAttrib[0][$allowTag]))	$newTagAttrib[$allowTag]=$tagAttrib[0][$allowTag];
+    										}
     										$tagParts[1]=$this->compileTagAttribs($newTagAttrib,$tagAttrib[1]);
-...
+    		}
     			// Unsetting tags:
     		reset($tagRegister);
     		while(list($tag,$positions)=each($tagRegister))	{
     			reset($positions);
     			while(list(,$pKey)=each($positions))	{
     		foreach ($tagRegister as $tag => $positions)	{
     			foreach ($positions as $pKey)	{
     				unset($newContent[$pKey]);
+    			}
+    		}
-...
     	function prefixResourcePath($main_prefix,$content,$alternatives=array(),$suffix='')	{
     		$parts = $this->splitTags('embed,td,table,body,img,input,form,link,script,a',$content);
     		foreach($parts as $k => $v)	{
     		foreach ($parts as $k => $v)	{
     			if ($k%2)	{
     				$params = $this->get_tag_attributes($v,1);
     				$tagEnd = substr($v,-2)=='/>' ? ' />' : '>';	// Detect tag-ending so that it is re-applied correctly.
-...
     					break;
+    				}
     				if ($somethingDone)	{
     					$tagParts = split('[[:space:]]',$v,2);
     					$tagParts = preg_split('/\s+/s',$v,2);
     					$tagParts[1]=$this->compileTagAttribs($params[0],$params[1]);
     					$parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).
     									$tagEnd;
     					$parts[$k] = '<'.trim(strtolower($firstTagName).' '.$tagParts[1]).$tagEnd;
+    				}
+    			}
+    		}
-...
     	 */
     	function cleanFontTags($value,$keepFace=0,$keepSize=0,$keepColor=0)	{
     		$fontSplit = $this->splitIntoBlock('font',$value);	// ,1 ?? - could probably be more stable if splitTags() was used since this depends on end-tags being properly set!
     		reset($fontSplit);
     		while(list($k,$v)=each($fontSplit))	{
     		foreach ($fontSplit as $k => $v)	{
     			if ($k%2)	{	// font:
     				$attribArray=$this->get_tag_attributes_classic($this->getFirstTag($v));
     				$newAttribs=array();
-...
     	function mapTags($value,$tags=array(),$ltChar='<',$ltChar2='<')	{
     		foreach($tags as $from => $to)	{
     			$value = eregi_replace($ltChar.$from.'>',$ltChar2.$to.'>',$value);
     			$value = eregi_replace($ltChar.$from.'[[:space:]]([^>]*)>',$ltChar2.$to.' \\1>',$value);
     			$value = eregi_replace($ltChar.'\/'.$from.'[^>]*>',$ltChar2.'/'.$to.'>',$value);
     			$value = preg_replace('/'.$preg_quote($ltChar).'(\/)?'.$from.'\s([^\>])*(\/)?\>/', $ltChar2.'$1'.$to.' $2$3>', $value);
+    		}
     		return $value;
+    	}
-...
     				if (strlen($tok)!=$tagEnd)	{
     					$endTag = $firstChar=='/' ? 1 : 0;
     					$tagContent = substr($tok,$endTag,$tagEnd-$endTag);
     					$tagParts = split('[[:space:]]',$tagContent,2);
     					$tagParts = preg_split('/\s+/s',$tagContent,2);
     					$tagName = strtolower($tagParts[0]);
     					if (!strcmp($tagList,'') || in_array($tagName,$tagsArray))	{
     						$contentParts[$k] = '<'.$subparts[0].'>'.$subparts[1];
-...
     		$tags=t3lib_div::trimExplode(',',$tagList,1);
     		$forthArr=array();
     		$backArr=array();
     		while(list(,$theTag)=each($tags))	{
     		foreach ($tags as $theTag)	{
     			$forthArr[$theTag]=md5($theTag);
     			$backArr[md5($theTag)]=$theTag;
+    		}
     			$value = $this->mapTags($value,$forthArr,'<','_');
     			$value=strip_tags($value);
     			$value = $this->mapTags($value,$backArr,'_','<');
     		$value = $this->mapTags($value,$forthArr,'<','_');
     		$value=strip_tags($value);
     		$value = $this->mapTags($value,$backArr,'_','<');
     		return $value;
+    	}
-...
     	 * Internal function for case shifting of a string or whole array
+    	 *
     	 * @param	mixed		Input string/array
     	 * @param	boolean		If $str is a string AND this boolean is true, the string is returned in uppercase
     	 * @param	boolean		If $str is a string AND this boolean(caseSensitive) is false, the string is returned in uppercase
     	 * @param	string		Key string used for internal caching of the results. Could be an MD5 hash of the serialized version of the input $str if that is an array.
     	 * @return	string		Output string, processed
     	 * @access private
     	 */
     	function caseShift($str,$flag,$cacheKey='')	{
     		$cacheKey .= $flag?1:0;
     		if (is_array($str))	{
     			if (!$cacheKey || !isset($this->caseShift_cache[$cacheKey]))	{
     				reset($str);
     				while(list($k)=each($str))	{
     					$str[$k] = strtoupper($str[$k]);
     				foreach ($str as $k => $v)	{
     					if (!$flag)	{
     						$str[$k] = strtoupper($v);
+    					}
+    				}
     				if ($cacheKey)	$this->caseShift_cache[$cacheKey]=$str;
     			} else {
     				$str = $this->caseShift_cache[$cacheKey];
+    			}
     		} elseif (!$flag)	$str = strtoupper($str);
     		} elseif (!$flag)	{ $str = strtoupper($str); }
     		return $str;
+    	}
-...
     	 */
     	function compileTagAttribs($tagAttrib,$meta=array(), $xhtmlClean=0)	{
     		$accu=array();
     		reset($tagAttrib);
     		while(list($k,$v)=each($tagAttrib))	{
     		foreach ($tagAttrib as $k =>$v)	{
     			if ($xhtmlClean)	{
     				$attr=strtolower($k);
     				if (strcmp($v,'') || isset($meta[$k]['dashType']))	{
-...
     	function indentLines($content, $number=1, $indentChar="\t")	{
     		$preTab = str_pad('', $number*strlen($indentChar), $indentChar);
     		$lines = explode(chr(10),str_replace(chr(13),'',$content));
     		while(list($k,$v) = each($lines))	{
     		foreach ($lines as $k => $v)	{
     			$lines[$k] = $preTab.$v;
+    		}
     		return implode(chr(10), $lines);
-...
+    			}
     			reset($TSconfig['tags.']);
     			while(list($key,$tagC)=each($TSconfig['tags.']))	{
     			foreach ($TSconfig['tags.'] as $key => $tagC)	{
     				if (is_array($tagC) && $key==strtolower($key))	{
     					$key=substr($key,0,-1);
     					if (!is_array($keepTags[$key]))	$keepTags[$key]=array();
-...
     		if ($conf['xhtml'])	{
     			if ($endTag)	{	// Endtags are just set lowercase right away
     				$value = strtolower($value);
     			} elseif (substr($value,0,2)!='<!') {	// ... and comments are ignored.
     			} elseif (substr($value,0,4)!='<!--') {	// ... and comments are ignored.
     				$inValue = substr($value,1,(substr($value,-2)=='/>'?-2:-1));	// Finding inner value with out < >
     				list($tagName,$tagP)=split('[[:space:]]',$inValue,2);	// Separate attributes and tagname
     				list($tagName,$tagP)=preg_split('/\s+/s',$inValue,2);	// Separate attributes and tagname
     				$tagName = strtolower($tagName);
     					// Process attributes

« Previous
1
2
Next »

(1-1/2)

Project

General

Profile

TYPO3 Core

Bug #15121 » preg_t3lib_parsehtml.diff