Your IP : 172.28.240.42


Current Path : /var/www/html/clients/rips/lib/
Upload File :
Current File : /var/www/html/clients/rips/lib/tokenizer.php

<?php
/** 

RIPS - A static source code analyser for vulnerabilities in PHP scripts 
	by Johannes Dahse (johannes.dahse@rub.de)
			
			
Copyright (C) 2012 Johannes Dahse

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>.

**/

class Tokenizer
{	
	public $filename;
	public $tokens;

	function __construct($filename)
	{
		$this->filename = $filename;
	}

	// main
	public function tokenize($code)
	{
		$this->tokens = token_get_all($code);			
		$this->prepare_tokens();
		$this->array_reconstruct_tokens();
		$this->fix_tokens();	
		$this->fix_ternary();
		#die(print_r($this->tokens));
		return $this->tokens;
	}
	
	// adds braces around offsets
	function wrapbraces($start, $between, $end)
	{
		$this->tokens = array_merge(
			array_slice($this->tokens, 0, $start), array('{'), 
			array_slice($this->tokens, $start, $between), array('}'),
			array_slice($this->tokens, $end)
		);	
	}

	// delete all tokens to ignore while scanning, mostly whitespaces	
	function prepare_tokens()
	{	
		// delete whitespaces and other unimportant tokens, rewrite some special tokens
		for($i=0, $max=count($this->tokens); $i<$max; $i++)
		{
			if( is_array($this->tokens[$i]) ) 
			{
				if( in_array($this->tokens[$i][0], Tokens::$T_IGNORE) )
					unset($this->tokens[$i]);
				else if( $this->tokens[$i][0] === T_CLOSE_TAG )
					$this->tokens[$i] = ';';	
				else if( $this->tokens[$i][0] === T_OPEN_TAG_WITH_ECHO )
					$this->tokens[$i][1] = 'echo';
			} 
			// @ (depress errors) disturbs connected token handling
			else if($this->tokens[$i] === '@') 
			{
				unset($this->tokens[$i]);
			}	
			// rewrite $array{index} to $array[index]
			else if( $this->tokens[$i] === '{'
			&& isset($this->tokens[$i-1]) && ((is_array($this->tokens[$i-1]) && $this->tokens[$i-1][0] === T_VARIABLE)
			|| $this->tokens[$i-1] === ']') )
			{
				$this->tokens[$i] = '[';
				$f=1;
				while($this->tokens[$i+$f] !== '}')
				{
					$f++;
					if(!isset($this->tokens[$i+$f]))
					{
						addError('Could not find closing brace of '.$this->tokens[$i-1][1].'{}.', array_slice($this->tokens, $i-1, 2), $this->tokens[$i-1][2], $this->filename);
						break;	
					}
				}
				$this->tokens[$i+$f] = ']';
			}	
		}
		
		// rearranged key index of tokens
		$this->tokens = array_values($this->tokens);
	}	
		
	// some tokenchains need to be fixed to scan correctly later	
	function fix_tokens()
	{	
		for($i=0; $i<($max=count($this->tokens)); $i++)
		{
		// convert `backticks` to backticks()
			if( $this->tokens[$i] === '`' )
			{		
				$f=1;
				while( $this->tokens[$i+$f] !== '`' )
				{	
					// get line_nr of any near token
					if( is_array($this->tokens[$i+$f]) )
						$line_nr = $this->tokens[$i+$f][2];

					$f++;
					if(!isset($this->tokens[$i+$f]) || $this->tokens[$i+$f] === ';')
					{
						addError('Could not find closing backtick `.', array_slice($this->tokens, $i, 5), $this->tokens[$i+1][2], $this->filename);
						break;	
					}
				}
				if(!empty($line_nr))
				{ 
					$this->tokens[$i+$f] = ')';
					$this->tokens[$i] = array(T_STRING, 'backticks', $line_nr);
				
					// add element backticks() to array 			
					$this->tokens = array_merge(
						array_slice($this->tokens, 0, $i+1), array('('), 
						array_slice($this->tokens, $i+1)
					);	
				}

			}
		// real token
			else if( is_array($this->tokens[$i]) )
			{	
			// rebuild if-clauses, for(), foreach(), while() without { }
				if ( ($this->tokens[$i][0] === T_IF || $this->tokens[$i][0] === T_ELSEIF || $this->tokens[$i][0] === T_FOR 
				|| $this->tokens[$i][0] === T_FOREACH || $this->tokens[$i][0] === T_WHILE) && $this->tokens[$i+1] === '(' )
				{		
					// skip condition in ( )
					$f=2;
					$braceopen = 1;
					while($braceopen !== 0 ) 
					{
						if($this->tokens[$i+$f] === '(')
							$braceopen++;
						else if($this->tokens[$i+$f] === ')')
							$braceopen--;
						$f++;

						if(!isset($this->tokens[$i+$f]))
						{
							addError('Could not find closing parenthesis of '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename);
							break;	
						}
					}	

					// alternate syntax while(): endwhile;
					if($this->tokens[$i+$f] === ':')
					{
						switch($this->tokens[$i][0])
						{
							case T_IF:
							case T_ELSEIF: $endtoken = T_ENDIF; break;
							case T_FOR: $endtoken = T_ENDFOR; break;
							case T_FOREACH: $endtoken = T_ENDFOREACH; break;
							case T_WHILE: $endtoken = T_ENDWHILE; break;
							default: $endtoken = ';';
						}
					
						$c=1;
						while( $this->tokens[$i+$f+$c][0] !== $endtoken)
						{
							$c++;
							if(!isset($this->tokens[$i+$f+$c]))
							{
								addError('Could not find end'.$this->tokens[$i][1].'; of alternate '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, $f+1), $this->tokens[$i][2], $this->filename);
								break;	
							}
						}
						$this->wrapbraces($i+$f+1, $c+1, $i+$f+$c+2);
					}
					// if body not in { (and not a do ... while();) wrap next instruction in braces
					else if($this->tokens[$i+$f] !== '{' && $this->tokens[$i+$f] !== ';')
					{
						$c=1;
						while($this->tokens[$i+$f+$c] !== ';' && $c<$max)
						{
							$c++;
						}
						$this->wrapbraces($i+$f, $c+1, $i+$f+$c+1);
					}
				} 
			// rebuild else without { }	
				else if( $this->tokens[$i][0] === T_ELSE 
				&& $this->tokens[$i+1][0] !== T_IF
				&& $this->tokens[$i+1] !== '{')
				{	
					$f=2;
					while( $this->tokens[$i+$f] !== ';' && $f<$max)
					{		
						$f++;
					}
					$this->wrapbraces($i+1, $f, $i+$f+1);
				}
			// rebuild switch (): endswitch;		
				else if( $this->tokens[$i][0] === T_SWITCH && $this->tokens[$i+1] === '(')
				{
					$newbraceopen = 1;
					$c=2;
					while( $newbraceopen !== 0 )
					{
						// watch function calls in function call
						if( $this->tokens[$i + $c] === '(' )
						{
							$newbraceopen++;
						}
						else if( $this->tokens[$i + $c] === ')' )
						{
							$newbraceopen--;
						}					
						else if(!isset($this->tokens[$i+$c]) || $this->tokens[$i + $c] === ';')
						{
							addError('Could not find closing parenthesis of switch-statement.', array_slice($this->tokens, $i, 10), $this->tokens[$i][2], $this->filename);
							break;	
						}
						$c++;
					}
					// switch(): ... endswitch;
					if($this->tokens[$i + $c] === ':')
					{
						$f=1;
						while( $this->tokens[$i+$c+$f][0] !== T_ENDSWITCH)
						{
							$f++;
							if(!isset($this->tokens[$i+$c+$f]))
							{
								addError('Could not find endswitch; of alternate switch-statement.', array_slice($this->tokens, $i, $c+1), $this->tokens[$i][2], $this->filename);
								break;	
							}
						}
						$this->wrapbraces($i+$c+1, $f+1, $i+$c+$f+2);
					}
				}
			// rebuild switch case: without { }	
				else if( $this->tokens[$i][0] === T_CASE )
				{
					$e=1;
					while($this->tokens[$i+$e] !== ':' && $this->tokens[$i+$e] !== ';')
					{
						$e++;
						
						if(!isset($this->tokens[$i+$e]))
						{
							addError('Could not find : or ; after '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename);
							break;	
						}
					}
					$f=$e+1;
					if(($this->tokens[$i+$e] === ':' || $this->tokens[$i+$e] === ';')
					&& $this->tokens[$i+$f] !== '{' 
					&& $this->tokens[$i+$f][0] !== T_CASE && $this->tokens[$i+$f][0] !== T_DEFAULT)
					{
						$newbraceopen = 0;
						while($newbraceopen || (isset($this->tokens[$i+$f]) && $this->tokens[$i+$f] !== '}' 
						&& !(is_array($this->tokens[$i+$f]) 
						&& ($this->tokens[$i+$f][0] === T_BREAK || $this->tokens[$i+$f][0] === T_CASE 
						|| $this->tokens[$i+$f][0] === T_DEFAULT || $this->tokens[$i+$f][0] === T_ENDSWITCH) ) ))
						{		
							if($this->tokens[$i+$f] === '{')
								$newbraceopen++;
							else if($this->tokens[$i+$f] === '}')	
								$newbraceopen--;
							$f++;
							
							if(!isset($this->tokens[$i+$f]))
							{
								addError('Could not find ending of '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, $e+5), $this->tokens[$i][2], $this->filename);
								break;	
							}
						}
						if($this->tokens[$i+$f][0] === T_BREAK)
						{
							if($this->tokens[$i+$f+1] === ';')
								$this->wrapbraces($i+$e+1, $f-$e+1, $i+$f+2);
							// break 3;	
							else
								$this->wrapbraces($i+$e+1, $f-$e+2, $i+$f+3);
						}	
						else
						{
							$this->wrapbraces($i+$e+1, $f-$e-1, $i+$f);
						}	
						$i++;
					}
				}
			// rebuild switch default: without { }	
				else if( $this->tokens[$i][0] === T_DEFAULT
				&& $this->tokens[$i+2] !== '{' )
				{
					$f=2;
					$newbraceopen = 0;
					while( $this->tokens[$i+$f] !== ';' && $this->tokens[$i+$f] !== '}' || $newbraceopen )
					{		
						if($this->tokens[$i+$f] === '{')
							$newbraceopen++;
						else if($this->tokens[$i+$f] === '}')	
							$newbraceopen--;
						$f++;
						
						if(!isset($this->tokens[$i+$f]))
						{
							addError('Could not find ending of '.$this->tokens[$i][1].'-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename);
							break;	
						}
					}
					$this->wrapbraces($i+2, $f-1, $i+$f+1);
				}
			// lowercase all function names because PHP doesn't care	
				else if( $this->tokens[$i][0] === T_FUNCTION )
				{
					$this->tokens[$i+1][1] = strtolower($this->tokens[$i+1][1]);
				}	
				else if( $this->tokens[$i][0] === T_STRING && $this->tokens[$i+1] === '(')
				{
					$this->tokens[$i][1] = strtolower($this->tokens[$i][1]);
				}	
			// switch a do while with a while (the difference in loop rounds doesnt matter
			// and we need the condition to be parsed before the loop tokens)
				else if( $this->tokens[$i][0] === T_DO )
				{
					$f=2;
					$otherDOs = 0;
					// f = T_WHILE token position relative to i
					while( $this->tokens[$i+$f][0] !== T_WHILE || $otherDOs )
					{		
						if($this->tokens[$i+$f][0] === T_DO)
							$otherDOs++;
						else if($this->tokens[$i+$f][0] === T_WHILE)
							$otherDOs--;
						$f++;
						
						if(!isset($this->tokens[$i+$f]))
						{
							addError('Could not find WHILE of DO-WHILE-statement.', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename);
							break;	
						}
					}
					
					// rebuild do while without {} (should never happen but we want to be sure)
					if($this->tokens[$i+1] !== '{')
					{
						$this->wrapbraces($i+1, $f-1, $i+$f);
						// by adding braces we added two new tokens
						$f+=2;
					}

					$d=1;
					// d = END of T_WHILE condition relative to i
					while( $this->tokens[$i+$f+$d] !== ';' && $d<$max )
					{
						$d++;
					}
					
					// reorder tokens and replace DO WHILE with WHILE
					$this->tokens = array_merge(
						array_slice($this->tokens, 0, $i), // before DO 
						array_slice($this->tokens, $i+$f, $d), // WHILE condition
						array_slice($this->tokens, $i+1, $f-1), // DO WHILE loop tokens
						array_slice($this->tokens, $i+$f+$d+1, count($this->tokens)) // rest of tokens without while condition
					);	
				}
			}	
		}
		// return tokens with rearranged key index
		$this->tokens = array_values($this->tokens);
	}
	
	// rewrite $arrays[] to	$variables and save keys in $tokens[$i][3]
	function array_reconstruct_tokens()
	{	
		for($i=0,$max=count($this->tokens); $i<$max; $i++)
		{
			// check for arrays
			if( is_array($this->tokens[$i]) && $this->tokens[$i][0] === T_VARIABLE && $this->tokens[$i+1] === '[' )
			{	
				$this->tokens[$i][3] = array();
				$has_more_keys = true;
				$index = -1;
				$c=2;
				
				// loop until no more index found: array[1][2][3]
				while($has_more_keys && $index < MAX_ARRAY_KEYS)
				{
					$index++;
					// save constant index as constant
					if(($this->tokens[$i+$c][0] === T_CONSTANT_ENCAPSED_STRING || $this->tokens[$i+$c][0] === T_LNUMBER || $this->tokens[$i+$c][0] === T_NUM_STRING || $this->tokens[$i+$c][0] === T_STRING) && $this->tokens[$i+$c+1] === ']')
					{ 		
						unset($this->tokens[$i+$c-1]);
						$this->tokens[$i][3][$index] = str_replace(array('"', "'"), '', $this->tokens[$i+$c][1]);
						unset($this->tokens[$i+$c]);
						unset($this->tokens[$i+$c+1]);
						$c+=2;
					// save tokens of non-constant index as token-array for backtrace later	
					} else
					{
						$this->tokens[$i][3][$index] = array();
						$newbraceopen = 1;
						unset($this->tokens[$i+$c-1]);
						while($newbraceopen !== 0)
						{	
							if( $this->tokens[$i+$c] === '[' )
							{
								$newbraceopen++;
							}
							else if( $this->tokens[$i+$c] === ']' )
							{
								$newbraceopen--;
							}
							else
							{
								$this->tokens[$i][3][$index][] = $this->tokens[$i+$c];
							}	
							unset($this->tokens[$i+$c]);
							$c++;
							
							if(!isset($this->tokens[$i+$c]))
							{
								addError('Could not find closing bracket of '.$this->tokens[$i][1].'[].', array_slice($this->tokens, $i, 5), $this->tokens[$i][2], $this->filename);
								break;	
							}
						}
						unset($this->tokens[$i+$c-1]);
					}
					if($this->tokens[$i+$c] !== '[')
						$has_more_keys = false;
					$c++;	
				}	
				
				$i+=$c-1;
			}
		}
	
		// return tokens with rearranged key index
		$this->tokens = array_values($this->tokens);		
	}
	
	// handle ternary operator (remove condition, only values should be handled during trace)
	// problem: tainting in the condition is not actual tainting the line -> remove condition	
	function fix_ternary()
	{
		for($i=0,$max=count($this->tokens); $i<$max; $i++)
		{
			if( $this->tokens[$i] === '?' )
			{
				unset($this->tokens[$i]);
				// condition in brackets: fine, delete condition
				if($this->tokens[$i-1] === ')')
				{
					unset($this->tokens[$i-1]);
					// delete tokens till ( 
					$newbraceopen = 1;
					$f = 2;
					while( $newbraceopen !== 0 && $this->tokens[$i - $f] !== ';')
					{
						if( $this->tokens[$i - $f] === '(' )
						{
							$newbraceopen--;
						}
						else if( $this->tokens[$i - $f] === ')' )
						{
							$newbraceopen++;
						}
						unset($this->tokens[$i - $f]);	
						$f++;
						
						if(($i-$f)<0)
						{
							addError('Could not find opening parenthesis in ternary operator (1).', array_slice($this->tokens, $i-5, 10), $this->tokens[$i+1][2], $this->filename);
							break;	
						}
					}

					//delete token before, if T_STRING
					if($this->tokens[$i-$f] === '!' || (is_array($this->tokens[$i-$f]) 
					&& ($this->tokens[$i-$f][0] === T_STRING || $this->tokens[$i-$f][0] === T_EMPTY || $this->tokens[$i-$f][0] === T_ISSET)))
					{
						unset($this->tokens[$i-$f]);
					}
					
				}
				// condition is a check or assignment
				else if(in_array($this->tokens[$i-2][0], Tokens::$T_ASSIGNMENT) || in_array($this->tokens[$i-2][0], Tokens::$T_OPERATOR) )
				{
					// remove both operands
					unset($this->tokens[$i-1]);
					unset($this->tokens[$i-2]);
					// if operand is in braces
					if($this->tokens[$i-3] === ')')
					{
						// delete tokens till ( 
						$newbraceopen = 1;
						$f = 4;
						while( $newbraceopen !== 0 )
						{
							if( $this->tokens[$i - $f] === '(' )
							{
								$newbraceopen--;
							}
							else if( $this->tokens[$i - $f] === ')' )
							{
								$newbraceopen++;
							}
							unset($this->tokens[$i - $f]);	
							$f++;
							
							if(($i-$f)<0 || $this->tokens[$i - $f] === ';')
							{
								addError('Could not find opening parenthesis in ternary operator (2).', array_slice($this->tokens, $i-8, 6), $this->tokens[$i+1][2], $this->filename);
								break;	
							}
						}

						//delete token before, if T_STRING
						if(is_array($this->tokens[$i-$f]) 
						&& ($this->tokens[$i-$f][0] === T_STRING || $this->tokens[$i-$f][0] === T_EMPTY || $this->tokens[$i-$f][0] === T_ISSET))
						{
							unset($this->tokens[$i-$f]);
						}
					}

					unset($this->tokens[$i-3]);
					
				}
				// condition is a single variable, delete
				else if(is_array($this->tokens[$i-1]) && $this->tokens[$i-1][0] === T_VARIABLE)
				{
					unset($this->tokens[$i-1]);
				}
			}	
		}
		// return tokens with rearranged key index
		$this->tokens = array_values($this->tokens);	
	}
}	
	
?>