836 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			836 lines
		
	
	
		
			26 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| 
 | |
| class HTMLPurifier_LexerTest extends HTMLPurifier_Harness
 | |
| {
 | |
| 
 | |
|     protected $_has_pear = false;
 | |
| 
 | |
|     public function __construct()
 | |
|     {
 | |
|         parent::__construct();
 | |
|         if ($GLOBALS['HTMLPurifierTest']['PH5P']) {
 | |
|             require_once 'HTMLPurifier/Lexer/PH5P.php';
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // HTMLPurifier_Lexer::create() --------------------------------------------
 | |
| 
 | |
|     public function test_create()
 | |
|     {
 | |
|         $this->config->set('Core.MaintainLineNumbers', true);
 | |
|         $lexer = HTMLPurifier_Lexer::create($this->config);
 | |
|         $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
 | |
|     }
 | |
| 
 | |
|     public function test_create_objectLexerImpl()
 | |
|     {
 | |
|         $this->config->set('Core.LexerImpl', new HTMLPurifier_Lexer_DirectLex());
 | |
|         $lexer = HTMLPurifier_Lexer::create($this->config);
 | |
|         $this->assertIsA($lexer, 'HTMLPurifier_Lexer_DirectLex');
 | |
|     }
 | |
| 
 | |
|     public function test_create_unknownLexer()
 | |
|     {
 | |
|         $this->config->set('Core.LexerImpl', 'AsdfAsdf');
 | |
|         $this->expectException(new HTMLPurifier_Exception('Cannot instantiate unrecognized Lexer type AsdfAsdf'));
 | |
|         HTMLPurifier_Lexer::create($this->config);
 | |
|     }
 | |
| 
 | |
|     public function test_create_incompatibleLexer()
 | |
|     {
 | |
|         $this->config->set('Core.LexerImpl', 'DOMLex');
 | |
|         $this->config->set('Core.MaintainLineNumbers', true);
 | |
|         $this->expectException(new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'));
 | |
|         HTMLPurifier_Lexer::create($this->config);
 | |
|     }
 | |
| 
 | |
|     // HTMLPurifier_Lexer->parseData() -----------------------------------------
 | |
| 
 | |
|     public function assertParseData($input, $expect = true)
 | |
|     {
 | |
|         if ($expect === true) $expect = $input;
 | |
|         $lexer = new HTMLPurifier_Lexer();
 | |
|         $this->assertIdentical($expect, $lexer->parseData($input));
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_plainText()
 | |
|     {
 | |
|         $this->assertParseData('asdf');
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_ampersandEntity()
 | |
|     {
 | |
|         $this->assertParseData('&', '&');
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_quotEntity()
 | |
|     {
 | |
|         $this->assertParseData('"', '"');
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_aposNumericEntity()
 | |
|     {
 | |
|         $this->assertParseData(''', "'");
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_aposCompactNumericEntity()
 | |
|     {
 | |
|         $this->assertParseData(''', "'");
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_adjacentAmpersandEntities()
 | |
|     {
 | |
|         $this->assertParseData('&&&', '&&&');
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_trailingUnescapedAmpersand()
 | |
|     {
 | |
|         $this->assertParseData('&&', '&&');
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_internalUnescapedAmpersand()
 | |
|     {
 | |
|         $this->assertParseData('Procter & Gamble');
 | |
|     }
 | |
| 
 | |
|     public function test_parseData_improperEntityFaultToleranceTest()
 | |
|     {
 | |
|         $this->assertParseData('-');
 | |
|     }
 | |
| 
 | |
|     // HTMLPurifier_Lexer->extractBody() ---------------------------------------
 | |
| 
 | |
|     public function assertExtractBody($text, $extract = true)
 | |
|     {
 | |
|         $lexer = new HTMLPurifier_Lexer();
 | |
|         $result = $lexer->extractBody($text);
 | |
|         if ($extract === true) $extract = $text;
 | |
|         $this->assertIdentical($extract, $result);
 | |
|     }
 | |
| 
 | |
|     public function test_extractBody_noBodyTags()
 | |
|     {
 | |
|         $this->assertExtractBody('<b>Bold</b>');
 | |
|     }
 | |
| 
 | |
|     public function test_extractBody_lowercaseBodyTags()
 | |
|     {
 | |
|         $this->assertExtractBody('<html><body><b>Bold</b></body></html>', '<b>Bold</b>');
 | |
|     }
 | |
| 
 | |
|     public function test_extractBody_uppercaseBodyTags()
 | |
|     {
 | |
|         $this->assertExtractBody('<HTML><BODY><B>Bold</B></BODY></HTML>', '<B>Bold</B>');
 | |
|     }
 | |
| 
 | |
|     public function test_extractBody_realisticUseCase()
 | |
|     {
 | |
|         $this->assertExtractBody(
 | |
| '<?xml version="1.0"
 | |
| <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 | |
|     "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 | |
| <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
 | |
|    <head>
 | |
|       <title>xyz</title>
 | |
|    </head>
 | |
|    <body>
 | |
|       <form method="post" action="whatever1">
 | |
|          <div>
 | |
|             <input type="text" name="username" />
 | |
|             <input type="text" name="password" />
 | |
|             <input type="submit" />
 | |
|          </div>
 | |
|       </form>
 | |
|    </body>
 | |
| </html>',
 | |
|     '
 | |
|       <form method="post" action="whatever1">
 | |
|          <div>
 | |
|             <input type="text" name="username" />
 | |
|             <input type="text" name="password" />
 | |
|             <input type="submit" />
 | |
|          </div>
 | |
|       </form>
 | |
|    ');
 | |
|     }
 | |
| 
 | |
|     public function test_extractBody_bodyWithAttributes()
 | |
|     {
 | |
|         $this->assertExtractBody('<html><body bgcolor="#F00"><b>Bold</b></body></html>', '<b>Bold</b>');
 | |
|     }
 | |
| 
 | |
|     public function test_extractBody_preserveUnclosedBody()
 | |
|     {
 | |
|         $this->assertExtractBody('<body>asdf'); // not closed, don't accept
 | |
|     }
 | |
| 
 | |
|     public function test_extractBody_useLastBody()
 | |
|     {
 | |
|         $this->assertExtractBody('<body>foo</body>bar</body>', 'foo</body>bar');
 | |
|     }
 | |
| 
 | |
|     // HTMLPurifier_Lexer->tokenizeHTML() --------------------------------------
 | |
| 
 | |
|     public function assertTokenization($input, $expect, $alt_expect = array())
 | |
|     {
 | |
|         $lexers = array();
 | |
|         $lexers['DirectLex']  = new HTMLPurifier_Lexer_DirectLex();
 | |
|         if (class_exists('DOMDocument')) {
 | |
|             $lexers['DOMLex'] = new HTMLPurifier_Lexer_DOMLex();
 | |
|             $lexers['PH5P']   = new HTMLPurifier_Lexer_PH5P();
 | |
|         }
 | |
|         foreach ($lexers as $name => $lexer) {
 | |
|             $result = $lexer->tokenizeHTML($input, $this->config, $this->context);
 | |
|             if (isset($alt_expect[$name])) {
 | |
|                 if ($alt_expect[$name] === false) continue;
 | |
|                 $t_expect = $alt_expect[$name];
 | |
|                 $this->assertIdentical($result, $alt_expect[$name], "$name: %s");
 | |
|             } else {
 | |
|                 $t_expect = $expect;
 | |
|                 $this->assertIdentical($result, $expect, "$name: %s");
 | |
|             }
 | |
|             if ($t_expect != $result) {
 | |
|                 printTokens($result);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_emptyInput()
 | |
|     {
 | |
|         $this->assertTokenization('', array());
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_plainText()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             'This is regular text.',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text('This is regular text.')
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_textAndTags()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             'This is <b>bold</b> text',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text('This is '),
 | |
|                 new HTMLPurifier_Token_Start('b', array()),
 | |
|                 new HTMLPurifier_Token_Text('bold'),
 | |
|                 new HTMLPurifier_Token_End('b'),
 | |
|                 new HTMLPurifier_Token_Text(' text'),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_normalizeCase()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<DIV>Totally rad dude. <b>asdf</b></div>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('DIV', array()),
 | |
|                 new HTMLPurifier_Token_Text('Totally rad dude. '),
 | |
|                 new HTMLPurifier_Token_Start('b', array()),
 | |
|                 new HTMLPurifier_Token_Text('asdf'),
 | |
|                 new HTMLPurifier_Token_End('b'),
 | |
|                 new HTMLPurifier_Token_End('div'),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_notWellFormed()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<asdf></asdf><d></d><poOloka><poolasdf><ds></asdf></ASDF>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('asdf'),
 | |
|                 new HTMLPurifier_Token_End('asdf'),
 | |
|                 new HTMLPurifier_Token_Start('d'),
 | |
|                 new HTMLPurifier_Token_End('d'),
 | |
|                 new HTMLPurifier_Token_Start('poOloka'),
 | |
|                 new HTMLPurifier_Token_Start('poolasdf'),
 | |
|                 new HTMLPurifier_Token_Start('ds'),
 | |
|                 new HTMLPurifier_Token_End('asdf'),
 | |
|                 new HTMLPurifier_Token_End('ASDF'),
 | |
|             ),
 | |
|             array(
 | |
|                 'DOMLex' => $alt = array(
 | |
|                     new HTMLPurifier_Token_Empty('asdf'),
 | |
|                     new HTMLPurifier_Token_Empty('d'),
 | |
|                     new HTMLPurifier_Token_Start('pooloka'),
 | |
|                     new HTMLPurifier_Token_Start('poolasdf'),
 | |
|                     new HTMLPurifier_Token_Empty('ds'),
 | |
|                     new HTMLPurifier_Token_End('poolasdf'),
 | |
|                     new HTMLPurifier_Token_End('pooloka'),
 | |
|                 ),
 | |
|                 // 20140831: Weird, but whatever...
 | |
|                 'PH5P' => array(new HTMLPurifier_Token_Empty('asdf')),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_whitespaceInTag()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<a'."\t".'href="foobar.php"'."\n".'title="foo!">Link to <b id="asdf">foobar</b></a>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('a',array('href'=>'foobar.php','title'=>'foo!')),
 | |
|                 new HTMLPurifier_Token_Text('Link to '),
 | |
|                 new HTMLPurifier_Token_Start('b',array('id'=>'asdf')),
 | |
|                 new HTMLPurifier_Token_Text('foobar'),
 | |
|                 new HTMLPurifier_Token_End('b'),
 | |
|                 new HTMLPurifier_Token_End('a'),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_singleAttribute()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<br style="&" />',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Empty('br', array('style' => '&'))
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_emptyTag()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<br />',
 | |
|             array( new HTMLPurifier_Token_Empty('br') )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_comment()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<!-- Comment -->',
 | |
|             array( new HTMLPurifier_Token_Comment(' Comment ') )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_malformedComment()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<!-- not so well formed --->',
 | |
|             array( new HTMLPurifier_Token_Comment(' not so well formed -') )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_unterminatedTag()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<a href=""',
 | |
|             array( new HTMLPurifier_Token_Text('<a href=""') ),
 | |
|             array(
 | |
|                 // I like our behavior better, but it's non-standard
 | |
|                 'DOMLex'   => array( new HTMLPurifier_Token_Empty('a', array('href'=>'')) ),
 | |
|                 'PH5P' => false, // total barfing, grabs scaffolding too
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_specialEntities()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<b>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text('<b>')
 | |
|             ),
 | |
|             array(
 | |
|                 // some parsers will separate entities out
 | |
|                 'PH5P' => array(
 | |
|                     new HTMLPurifier_Token_Text('<'),
 | |
|                     new HTMLPurifier_Token_Text('b'),
 | |
|                     new HTMLPurifier_Token_Text('>'),
 | |
|                 ),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_earlyQuote()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<a "=>',
 | |
|             array( new HTMLPurifier_Token_Empty('a') ),
 | |
|             array(
 | |
|                 // we barf on this input
 | |
|                 'DirectLex' => array(
 | |
|                     new HTMLPurifier_Token_Start('a', array('"' => ''))
 | |
|                 ),
 | |
|                 'PH5P' => false, // behavior varies; handle this personally
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_earlyQuote_PH5P()
 | |
|     {
 | |
|         if (!class_exists('DOMDocument')) return;
 | |
|         $lexer = new HTMLPurifier_Lexer_PH5P();
 | |
|         $result = $lexer->tokenizeHTML('<a "=>', $this->config, $this->context);
 | |
|         if ($this->context->get('PH5PError', true)) {
 | |
|             $this->assertIdentical(array(
 | |
|                 new HTMLPurifier_Token_Start('a', array('"' => ''))
 | |
|             ), $result);
 | |
|         } else {
 | |
|             $this->assertIdentical(array(
 | |
|                 new HTMLPurifier_Token_Empty('a', array('"' => ''))
 | |
|             ), $result);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_unescapedQuote()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '"',
 | |
|             array( new HTMLPurifier_Token_Text('"') )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_escapedQuote()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '"',
 | |
|             array( new HTMLPurifier_Token_Text('"') )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_cdata()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<![CDATA[You <b>can't</b> get me!]]>',
 | |
|             array( new HTMLPurifier_Token_Text('You <b>can't</b> get me!') ),
 | |
|             array(
 | |
|                 'PH5P' =>  array(
 | |
|                     new HTMLPurifier_Token_Text('You '),
 | |
|                     new HTMLPurifier_Token_Text('<'),
 | |
|                     new HTMLPurifier_Token_Text('b'),
 | |
|                     new HTMLPurifier_Token_Text('>'),
 | |
|                     new HTMLPurifier_Token_Text('can'),
 | |
|                     new HTMLPurifier_Token_Text('&'),
 | |
|                     new HTMLPurifier_Token_Text('#39;t'),
 | |
|                     new HTMLPurifier_Token_Text('<'),
 | |
|                     new HTMLPurifier_Token_Text('/b'),
 | |
|                     new HTMLPurifier_Token_Text('>'),
 | |
|                     new HTMLPurifier_Token_Text(' get me!'),
 | |
|                 ),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_characterEntity()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             'θ',
 | |
|             array( new HTMLPurifier_Token_Text("\xCE\xB8") )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_characterEntityInCDATA()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<![CDATA[→]]>',
 | |
|             array( new HTMLPurifier_Token_Text("→") ),
 | |
|             array(
 | |
|                 'PH5P' => array(
 | |
|                     new HTMLPurifier_Token_Text('&'),
 | |
|                     new HTMLPurifier_Token_Text('rarr;'),
 | |
|                 ),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_entityInAttribute()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<a href="index.php?title=foo&id=bar">Link</a>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('a',array('href' => 'index.php?title=foo&id=bar')),
 | |
|                 new HTMLPurifier_Token_Text('Link'),
 | |
|                 new HTMLPurifier_Token_End('a'),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_preserveUTF8()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             "\xCE\xB8",
 | |
|             array( new HTMLPurifier_Token_Text("\xCE\xB8") )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_specialEntityInAttribute()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<br test="x < 6" />',
 | |
|             array( new HTMLPurifier_Token_Empty('br', array('test' => 'x < 6')) )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_emoticonProtection()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<b>Whoa! <3 That\'s not good >.></b>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('b'),
 | |
|                 new HTMLPurifier_Token_Text('Whoa! '),
 | |
|                 new HTMLPurifier_Token_Text('<'),
 | |
|                 new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
 | |
|                 new HTMLPurifier_Token_End('b')
 | |
|             ),
 | |
|             array(
 | |
|                 // text is absorbed together
 | |
|                 'DOMLex' => array(
 | |
|                     new HTMLPurifier_Token_Start('b'),
 | |
|                     new HTMLPurifier_Token_Text('Whoa! <3 That\'s not good >.>'),
 | |
|                     new HTMLPurifier_Token_End('b'),
 | |
|                 ),
 | |
|                 'PH5P' => array( // interesting grouping
 | |
|                     new HTMLPurifier_Token_Start('b'),
 | |
|                     new HTMLPurifier_Token_Text('Whoa! '),
 | |
|                     new HTMLPurifier_Token_Text('<'),
 | |
|                     new HTMLPurifier_Token_Text('3 That\'s not good >.>'),
 | |
|                     new HTMLPurifier_Token_End('b'),
 | |
|                 ),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_commentWithFunkyChars()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<!-- This >< comment --><br />',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Comment(' This >< comment '),
 | |
|                 new HTMLPurifier_Token_Empty('br'),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_unterminatedComment()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<!-- This >< comment',
 | |
|             array( new HTMLPurifier_Token_Comment(' This >< comment') ),
 | |
|             array(
 | |
|                 'DOMLex'   => false,
 | |
|                 'PH5P'     => false,
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_scriptCDATAContents()
 | |
|     {
 | |
|         $this->config->set('HTML.Trusted', true);
 | |
|         $this->assertTokenization(
 | |
|             'Foo: <script>alert("<foo>");</script>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text('Foo: '),
 | |
|                 new HTMLPurifier_Token_Start('script'),
 | |
|                 new HTMLPurifier_Token_Text('alert("<foo>");'),
 | |
|                 new HTMLPurifier_Token_End('script'),
 | |
|             ),
 | |
|             array(
 | |
|                 // PH5P, for some reason, bubbles the script to <head>
 | |
|                 'PH5P' => false,
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_entitiesInComment()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<!-- This comment < < & -->',
 | |
|             array( new HTMLPurifier_Token_Comment(' This comment < < & ') )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_attributeWithSpecialCharacters()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<a href="><>">',
 | |
|             array( new HTMLPurifier_Token_Empty('a', array('href' => '><>')) ),
 | |
|             array(
 | |
|                 'DirectLex' => array(
 | |
|                     new HTMLPurifier_Token_Start('a', array('href' => '')),
 | |
|                     new HTMLPurifier_Token_Text('<'),
 | |
|                     new HTMLPurifier_Token_Text('">'),
 | |
|                 )
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_emptyTagWithSlashInAttribute()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<param name="src" value="http://example.com/video.wmv" />',
 | |
|             array( new HTMLPurifier_Token_Empty('param', array('name' => 'src', 'value' => 'http://example.com/video.wmv')) )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_style()
 | |
|     {
 | |
|         $extra = array(
 | |
|                 // PH5P doesn't seem to like style tags
 | |
|                 'PH5P' => false,
 | |
|                 // DirectLex defers to RemoveForeignElements for textification
 | |
|                 'DirectLex' => array(
 | |
|                     new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
 | |
|                     new HTMLPurifier_Token_Comment("\ndiv {}\n"),
 | |
|                     new HTMLPurifier_Token_End('style'),
 | |
|                 ),
 | |
|             );
 | |
|         if (!defined('LIBXML_VERSION')) {
 | |
|             // LIBXML_VERSION is missing in early versions of PHP
 | |
|             // prior to 1.30 of php-src/ext/libxml/libxml.c (version-wise,
 | |
|             // this translates to 5.0.x. In such cases, punt the test entirely.
 | |
|             return;
 | |
|         } elseif (LIBXML_VERSION < 20628) {
 | |
|             // libxml's behavior is wrong prior to this version, so make
 | |
|             // appropriate accomodations
 | |
|             $extra['DOMLex'] = $extra['DirectLex'];
 | |
|         }
 | |
|         $this->assertTokenization(
 | |
| '<style type="text/css"><!--
 | |
| div {}
 | |
| --></style>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('style', array('type' => 'text/css')),
 | |
|                 new HTMLPurifier_Token_Text("\ndiv {}\n"),
 | |
|                 new HTMLPurifier_Token_End('style'),
 | |
|             ),
 | |
|             $extra
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_tagWithAtSignAndExtraGt()
 | |
|     {
 | |
|         $alt_expect = array(
 | |
|             // Technically this is invalid, but it won't be a
 | |
|             // problem with invalid element removal; also, this
 | |
|             // mimics Mozilla's parsing of the tag.
 | |
|             new HTMLPurifier_Token_Start('a@'),
 | |
|             new HTMLPurifier_Token_Text('>'),
 | |
|         );
 | |
|         $this->assertTokenization(
 | |
|             '<a@>>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('a'),
 | |
|                 new HTMLPurifier_Token_Text('>'),
 | |
|                 new HTMLPurifier_Token_End('a'),
 | |
|             ),
 | |
|             array(
 | |
|                 'DirectLex' => $alt_expect,
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_emoticonHeart()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<br /><3<br />',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Empty('br'),
 | |
|                 new HTMLPurifier_Token_Text('<'),
 | |
|                 new HTMLPurifier_Token_Text('3'),
 | |
|                 new HTMLPurifier_Token_Empty('br'),
 | |
|             ),
 | |
|             array(
 | |
|                 'DOMLex' => array(
 | |
|                     new HTMLPurifier_Token_Empty('br'),
 | |
|                     new HTMLPurifier_Token_Text('<3'),
 | |
|                     new HTMLPurifier_Token_Empty('br'),
 | |
|                 ),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_emoticonShiftyEyes()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<b><<</b>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('b'),
 | |
|                 new HTMLPurifier_Token_Text('<'),
 | |
|                 new HTMLPurifier_Token_Text('<'),
 | |
|                 new HTMLPurifier_Token_End('b'),
 | |
|             ),
 | |
|             array(
 | |
|                 'DOMLex' => array(
 | |
|                     new HTMLPurifier_Token_Start('b'),
 | |
|                     new HTMLPurifier_Token_Text('<<'),
 | |
|                     new HTMLPurifier_Token_End('b'),
 | |
|                 ),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_eon1996()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '< <b>test</b>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text('<'),
 | |
|                 new HTMLPurifier_Token_Text(' '),
 | |
|                 new HTMLPurifier_Token_Start('b'),
 | |
|                 new HTMLPurifier_Token_Text('test'),
 | |
|                 new HTMLPurifier_Token_End('b'),
 | |
|             ),
 | |
|             array(
 | |
|                 'DOMLex' => array(
 | |
|                     new HTMLPurifier_Token_Text('< '),
 | |
|                     new HTMLPurifier_Token_Start('b'),
 | |
|                     new HTMLPurifier_Token_Text('test'),
 | |
|                     new HTMLPurifier_Token_End('b'),
 | |
|                 ),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_bodyInCDATA()
 | |
|     {
 | |
|         $alt_tokens = array(
 | |
|             new HTMLPurifier_Token_Text('<'),
 | |
|             new HTMLPurifier_Token_Text('body'),
 | |
|             new HTMLPurifier_Token_Text('>'),
 | |
|             new HTMLPurifier_Token_Text('Foo'),
 | |
|             new HTMLPurifier_Token_Text('<'),
 | |
|             new HTMLPurifier_Token_Text('/body'),
 | |
|             new HTMLPurifier_Token_Text('>'),
 | |
|         );
 | |
|         $this->assertTokenization(
 | |
|             '<![CDATA[<body>Foo</body>]]>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text('<body>Foo</body>'),
 | |
|             ),
 | |
|             array(
 | |
|                 'PH5P' => $alt_tokens,
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<a><img /></a>',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Start('a'),
 | |
|                 new HTMLPurifier_Token_Empty('img'),
 | |
|                 new HTMLPurifier_Token_End('a'),
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_ignoreIECondComment()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<!--[if IE]>foo<a>bar<!-- baz --><![endif]-->',
 | |
|             array()
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_removeProcessingInstruction()
 | |
|     {
 | |
|         $this->config->set('Core.RemoveProcessingInstructions', true);
 | |
|         $this->assertTokenization(
 | |
|             '<?xml blah blah ?>',
 | |
|             array()
 | |
|         );
 | |
|     }
 | |
| 
 | |
|    public function test_tokenizeHTML_removeNewline()
 | |
|    {
 | |
|         $this->config->set('Core.NormalizeNewlines', true);
 | |
|         $this->assertTokenization(
 | |
|             "plain\rtext\r\n",
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text("plain\ntext\n")
 | |
|             )
 | |
|         );
 | |
|    }
 | |
| 
 | |
|    public function test_tokenizeHTML_noRemoveNewline()
 | |
|    {
 | |
|         $this->config->set('Core.NormalizeNewlines', false);
 | |
|         $this->assertTokenization(
 | |
|             "plain\rtext\r\n",
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text("plain\rtext\r\n")
 | |
|             )
 | |
|         );
 | |
|      }
 | |
| 
 | |
|     public function test_tokenizeHTML_conditionalCommentUngreedy()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '<!--[if gte mso 9]>a<![endif]-->b<!--[if gte mso 9]>c<![endif]-->',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Text("b")
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_imgTag()
 | |
|     {
 | |
|         $start = array(
 | |
|                         new HTMLPurifier_Token_Start('img',
 | |
|                             array(
 | |
|                                 'src' => 'img_11775.jpg',
 | |
|                                 'alt' => '[Img #11775]',
 | |
|                                 'id' => 'EMBEDDED_IMG_11775',
 | |
|                             )
 | |
|                         )
 | |
|                     );
 | |
|         $this->assertTokenization(
 | |
|             '<img src="img_11775.jpg" alt="[Img #11775]" id="EMBEDDED_IMG_11775" >',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_Empty('img',
 | |
|                     array(
 | |
|                         'src' => 'img_11775.jpg',
 | |
|                         'alt' => '[Img #11775]',
 | |
|                         'id' => 'EMBEDDED_IMG_11775',
 | |
|                     )
 | |
|                 )
 | |
|             ),
 | |
|             array(
 | |
|                 'DirectLex' => $start,
 | |
|                 )
 | |
|         );
 | |
|     }
 | |
| 
 | |
|     public function test_tokenizeHTML_prematureDivClose()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             '</div>dontdie',
 | |
|             array(
 | |
|                 new HTMLPurifier_Token_End('div'),
 | |
|                 new HTMLPurifier_Token_Text('dontdie')
 | |
|             ),
 | |
|             array(
 | |
|                 'DOMLex' => $alt = array(new HTMLPurifier_Token_Text('dontdie')),
 | |
|                 'PH5P' => $alt
 | |
|             )
 | |
|         );
 | |
|     }
 | |
| 
 | |
| 
 | |
|     /*
 | |
| 
 | |
|     public function test_tokenizeHTML_()
 | |
|     {
 | |
|         $this->assertTokenization(
 | |
|             ,
 | |
|             array(
 | |
| 
 | |
|             )
 | |
|         );
 | |
|     }
 | |
|     */
 | |
| 
 | |
| }
 | |
| 
 | |
| // vim: et sw=4 sts=4
 | 
