This commit is contained in:
Manish Verma
2016-12-13 18:18:25 +05:30
parent fc98add11c
commit 2d8e640e9b
2314 changed files with 97798 additions and 75664 deletions

View File

@@ -940,54 +940,29 @@ class Crawler implements \Countable, \IteratorAggregate
{
$expressions = array();
$unionPattern = '/\|(?![^\[]*\])/';
// An expression which will never match to replace expressions which cannot match in the crawler
// We cannot simply drop
$nonMatchingExpression = 'a[name() = "b"]';
$xpathLen = strlen($xpath);
$openedBrackets = 0;
$startPosition = strspn($xpath, " \t\n\r\0\x0B");
// Split any unions into individual expressions.
foreach (preg_split($unionPattern, $xpath) as $expression) {
$expression = trim($expression);
$parenthesis = '';
for ($i = $startPosition; $i <= $xpathLen; ++$i) {
$i += strcspn($xpath, '"\'[]|', $i);
if ($i < $xpathLen) {
switch ($xpath[$i]) {
case '"':
case "'":
if (false === $i = strpos($xpath, $xpath[$i], $i + 1)) {
return $xpath; // The XPath expression is invalid
}
continue 2;
case '[':
++$openedBrackets;
continue 2;
case ']':
--$openedBrackets;
continue 2;
}
// If the union is inside some braces, we need to preserve the opening braces and apply
// the change only inside it.
if (preg_match('/^[\(\s*]+/', $expression, $matches)) {
$parenthesis = $matches[0];
$expression = substr($expression, strlen($parenthesis));
}
if ($openedBrackets) {
continue;
}
if ($startPosition < $xpathLen && '(' === $xpath[$startPosition]) {
// If the union is inside some braces, we need to preserve the opening braces and apply
// the change only inside it.
$j = 1 + strspn($xpath, "( \t\n\r\0\x0B", $startPosition + 1);
$parenthesis = substr($xpath, $startPosition, $j);
$startPosition += $j;
} else {
$parenthesis = '';
}
$expression = rtrim(substr($xpath, $startPosition, $i - $startPosition));
if (0 === strpos($expression, 'self::*/')) {
$expression = './'.substr($expression, 8);
}
// add prefix before absolute element selector
if ('' === $expression) {
if (empty($expression)) {
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, '//')) {
$expression = 'descendant-or-self::'.substr($expression, 2);
@@ -1000,7 +975,7 @@ class Crawler implements \Countable, \IteratorAggregate
} elseif ('/' === $expression[0] || '.' === $expression[0] || 0 === strpos($expression, 'self::')) {
$expression = $nonMatchingExpression;
} elseif (0 === strpos($expression, 'descendant::')) {
$expression = 'descendant-or-self::'.substr($expression, 12);
$expression = 'descendant-or-self::'.substr($expression, strlen('descendant::'));
} elseif (preg_match('/^(ancestor|ancestor-or-self|attribute|following|following-sibling|namespace|parent|preceding|preceding-sibling)::/', $expression)) {
// the fake root has no parent, preceding or following nodes and also no attributes (even no namespace attributes)
$expression = $nonMatchingExpression;
@@ -1008,16 +983,9 @@ class Crawler implements \Countable, \IteratorAggregate
$expression = 'self::'.$expression;
}
$expressions[] = $parenthesis.$expression;
if ($i === $xpathLen) {
return implode(' | ', $expressions);
}
$i += strspn($xpath, " \t\n\r\0\x0B", $i + 1);
$startPosition = $i + 1;
}
return $xpath; // The XPath expression is invalid
return implode(' | ', $expressions);
}
/**

View File

@@ -430,7 +430,6 @@ EOF
$this->assertCount(5, $crawler->filterXPath('(//a | //div)//img'));
$this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)'));
$this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )'));
$this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]"));
}
public function testFilterXPath()
@@ -597,7 +596,7 @@ EOF
$this->assertCount(0, $crawler->filterXPath('self::a'), 'The fake root node has no "real" element name');
$this->assertCount(0, $crawler->filterXPath('self::a/img'), 'The fake root node has no "real" element name');
$this->assertCount(10, $crawler->filterXPath('self::*/a'));
$this->assertCount(9, $crawler->filterXPath('self::*/a'));
}
public function testFilter()
@@ -1080,8 +1079,6 @@ HTML;
<a href="?get=param">GetLink</a>
<a href="/example">Klausi|Claudiu</a>
<form action="foo" id="FooFormId">
<input type="text" value="TextValue" name="TextName" />
<input type="submit" value="FooValue" name="FooName" id="FooId" />