package and depencies
This commit is contained in:
225
vendor/symfony/dom-crawler/Crawler.php
vendored
225
vendor/symfony/dom-crawler/Crawler.php
vendored
@@ -30,62 +30,44 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* The default namespace prefix to be used with XPath and CSS expressions.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
private $defaultNamespacePrefix = 'default';
|
||||
private string $defaultNamespacePrefix = 'default';
|
||||
|
||||
/**
|
||||
* A map of manually registered namespaces.
|
||||
*
|
||||
* @var array<string, string>
|
||||
*/
|
||||
private $namespaces = [];
|
||||
private array $namespaces = [];
|
||||
|
||||
/**
|
||||
* A map of cached namespaces.
|
||||
*
|
||||
* @var \ArrayObject
|
||||
*/
|
||||
private $cachedNamespaces;
|
||||
private \ArrayObject $cachedNamespaces;
|
||||
|
||||
/**
|
||||
* The base href value.
|
||||
*
|
||||
* @var string|null
|
||||
*/
|
||||
private $baseHref;
|
||||
|
||||
/**
|
||||
* @var \DOMDocument|null
|
||||
*/
|
||||
private $document;
|
||||
private ?string $baseHref;
|
||||
private ?\DOMDocument $document = null;
|
||||
|
||||
/**
|
||||
* @var list<\DOMNode>
|
||||
*/
|
||||
private $nodes = [];
|
||||
private array $nodes = [];
|
||||
|
||||
/**
|
||||
* Whether the Crawler contains HTML or XML content (used when converting CSS to XPath).
|
||||
*
|
||||
* @var bool
|
||||
*/
|
||||
private $isHtml = true;
|
||||
private bool $isHtml = true;
|
||||
|
||||
/**
|
||||
* @var HTML5|null
|
||||
*/
|
||||
private $html5Parser;
|
||||
private HTML5 $html5Parser;
|
||||
|
||||
/**
|
||||
* @param \DOMNodeList|\DOMNode|\DOMNode[]|string|null $node A Node to use as the base for the crawling
|
||||
*/
|
||||
public function __construct($node = null, string $uri = null, string $baseHref = null)
|
||||
public function __construct(\DOMNodeList|\DOMNode|array|string $node = null, string $uri = null, string $baseHref = null)
|
||||
{
|
||||
$this->uri = $uri;
|
||||
$this->baseHref = $baseHref ?: $uri;
|
||||
$this->html5Parser = class_exists(HTML5::class) ? new HTML5(['disable_html_ns' => true]) : null;
|
||||
$this->html5Parser = new HTML5(['disable_html_ns' => true]);
|
||||
$this->cachedNamespaces = new \ArrayObject();
|
||||
|
||||
$this->add($node);
|
||||
@@ -93,20 +75,16 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* Returns the current URI.
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getUri()
|
||||
public function getUri(): ?string
|
||||
{
|
||||
return $this->uri;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns base href.
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
public function getBaseHref()
|
||||
public function getBaseHref(): ?string
|
||||
{
|
||||
return $this->baseHref;
|
||||
}
|
||||
@@ -131,7 +109,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* @throws \InvalidArgumentException when node is not the expected type
|
||||
*/
|
||||
public function add($node)
|
||||
public function add(\DOMNodeList|\DOMNode|array|string|null $node)
|
||||
{
|
||||
if ($node instanceof \DOMNodeList) {
|
||||
$this->addNodeList($node);
|
||||
@@ -230,14 +208,11 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
public function addXmlContent(string $content, string $charset = 'UTF-8', int $options = \LIBXML_NONET)
|
||||
{
|
||||
// remove the default namespace if it's the only namespace to make XPath expressions simpler
|
||||
if (!preg_match('/xmlns:/', $content)) {
|
||||
if (!str_contains($content, 'xmlns:')) {
|
||||
$content = str_replace('xmlns', 'ns', $content);
|
||||
}
|
||||
|
||||
$internalErrors = libxml_use_internal_errors(true);
|
||||
if (\LIBXML_VERSION < 20900) {
|
||||
$disableEntities = libxml_disable_entity_loader(true);
|
||||
}
|
||||
|
||||
$dom = new \DOMDocument('1.0', $charset);
|
||||
$dom->validateOnParse = true;
|
||||
@@ -247,9 +222,6 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
}
|
||||
|
||||
libxml_use_internal_errors($internalErrors);
|
||||
if (\LIBXML_VERSION < 20900) {
|
||||
libxml_disable_entity_loader($disableEntities);
|
||||
}
|
||||
|
||||
$this->addDocument($dom);
|
||||
|
||||
@@ -309,9 +281,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
throw new \InvalidArgumentException('Attaching DOM nodes from multiple documents in the same crawler is forbidden.');
|
||||
}
|
||||
|
||||
if (null === $this->document) {
|
||||
$this->document = $node->ownerDocument;
|
||||
}
|
||||
$this->document ??= $node->ownerDocument;
|
||||
|
||||
// Don't add duplicate nodes in the Crawler
|
||||
if (\in_array($node, $this->nodes, true)) {
|
||||
@@ -323,10 +293,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* Returns a node given its position in the node list.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function eq(int $position)
|
||||
public function eq(int $position): static
|
||||
{
|
||||
if (isset($this->nodes[$position])) {
|
||||
return $this->createSubCrawler($this->nodes[$position]);
|
||||
@@ -351,7 +319,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* @return array An array of values returned by the anonymous function
|
||||
*/
|
||||
public function each(\Closure $closure)
|
||||
public function each(\Closure $closure): array
|
||||
{
|
||||
$data = [];
|
||||
foreach ($this->nodes as $i => $node) {
|
||||
@@ -363,10 +331,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* Slices the list of nodes by $offset and $length.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function slice(int $offset = 0, int $length = null)
|
||||
public function slice(int $offset = 0, int $length = null): static
|
||||
{
|
||||
return $this->createSubCrawler(\array_slice($this->nodes, $offset, $length));
|
||||
}
|
||||
@@ -377,10 +343,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
* To remove a node from the list, the anonymous function must return false.
|
||||
*
|
||||
* @param \Closure $closure An anonymous function
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function reduce(\Closure $closure)
|
||||
public function reduce(\Closure $closure): static
|
||||
{
|
||||
$nodes = [];
|
||||
foreach ($this->nodes as $i => $node) {
|
||||
@@ -394,20 +358,16 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* Returns the first node of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function first()
|
||||
public function first(): static
|
||||
{
|
||||
return $this->eq(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the last node of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function last()
|
||||
public function last(): static
|
||||
{
|
||||
return $this->eq(\count($this->nodes) - 1);
|
||||
}
|
||||
@@ -415,11 +375,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns the siblings nodes of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
*/
|
||||
public function siblings()
|
||||
public function siblings(): static
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -470,11 +428,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns the next siblings nodes of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
*/
|
||||
public function nextAll()
|
||||
public function nextAll(): static
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -486,11 +442,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns the previous sibling nodes of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*
|
||||
* @throws \InvalidArgumentException
|
||||
*/
|
||||
public function previousAll()
|
||||
public function previousAll(): static
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -499,28 +453,12 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
return $this->createSubCrawler($this->sibling($this->getNode(0), 'previousSibling'));
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the parent nodes of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
*/
|
||||
public function parents()
|
||||
{
|
||||
trigger_deprecation('symfony/dom-crawler', '5.3', 'The %s() method is deprecated, use ancestors() instead.', __METHOD__);
|
||||
|
||||
return $this->ancestors();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the ancestors of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*
|
||||
* @throws \InvalidArgumentException When the current node is empty
|
||||
*/
|
||||
public function ancestors()
|
||||
public function ancestors(): static
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -541,12 +479,10 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns the children nodes of the current selection.
|
||||
*
|
||||
* @return static
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
* @throws \RuntimeException If the CssSelector Component is not available and $selector is provided
|
||||
*/
|
||||
public function children(string $selector = null)
|
||||
public function children(string $selector = null): static
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -567,11 +503,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns the attribute value of the first node of the list.
|
||||
*
|
||||
* @return string|null
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
*/
|
||||
public function attr(string $attribute)
|
||||
public function attr(string $attribute): ?string
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -585,11 +519,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns the node name of the first node of the list.
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
*/
|
||||
public function nodeName()
|
||||
public function nodeName(): string
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -606,11 +538,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
* @param string|null $default When not null: the value to return when the current node is empty
|
||||
* @param bool $normalizeWhitespace Whether whitespaces should be trimmed and normalized to single spaces
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
*/
|
||||
public function text(string $default = null, bool $normalizeWhitespace = true)
|
||||
public function text(string $default = null, bool $normalizeWhitespace = true): string
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
if (null !== $default) {
|
||||
@@ -642,11 +572,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* @param string|null $default When not null: the value to return when the current node is empty
|
||||
*
|
||||
* @return string
|
||||
*
|
||||
* @throws \InvalidArgumentException When current node is empty
|
||||
*/
|
||||
public function html(string $default = null)
|
||||
public function html(string $default = null): string
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
if (null !== $default) {
|
||||
@@ -659,7 +587,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
$node = $this->getNode(0);
|
||||
$owner = $node->ownerDocument;
|
||||
|
||||
if (null !== $this->html5Parser && '<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
|
||||
if ('<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
|
||||
$owner = $this->html5Parser;
|
||||
}
|
||||
|
||||
@@ -680,7 +608,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
$node = $this->getNode(0);
|
||||
$owner = $node->ownerDocument;
|
||||
|
||||
if (null !== $this->html5Parser && '<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
|
||||
if ('<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
|
||||
$owner = $this->html5Parser;
|
||||
}
|
||||
|
||||
@@ -692,10 +620,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* Since an XPath expression might evaluate to either a simple type or a \DOMNodeList,
|
||||
* this method will return either an array of simple types or a new Crawler instance.
|
||||
*
|
||||
* @return array|Crawler
|
||||
*/
|
||||
public function evaluate(string $xpath)
|
||||
public function evaluate(string $xpath): array|Crawler
|
||||
{
|
||||
if (null === $this->document) {
|
||||
throw new \LogicException('Cannot evaluate the expression on an uninitialized crawler.');
|
||||
@@ -723,10 +649,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
* Example:
|
||||
*
|
||||
* $crawler->filter('h1 a')->extract(['_text', 'href']);
|
||||
*
|
||||
* @return array
|
||||
*/
|
||||
public function extract(array $attributes)
|
||||
public function extract(array $attributes): array
|
||||
{
|
||||
$count = \count($attributes);
|
||||
|
||||
@@ -756,10 +680,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
* is considered as a fake parent of the elements inside it.
|
||||
* This means that a child selector "div" or "./div" will match only
|
||||
* the div elements of the current crawler, not their children.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function filterXPath(string $xpath)
|
||||
public function filterXPath(string $xpath): static
|
||||
{
|
||||
$xpath = $this->relativize($xpath);
|
||||
|
||||
@@ -776,11 +698,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* This method only works if you have installed the CssSelector Symfony Component.
|
||||
*
|
||||
* @return static
|
||||
*
|
||||
* @throws \RuntimeException if the CssSelector Component is not available
|
||||
*/
|
||||
public function filter(string $selector)
|
||||
public function filter(string $selector): static
|
||||
{
|
||||
$converter = $this->createCssSelectorConverter();
|
||||
|
||||
@@ -790,10 +710,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* Selects links by name or alt value for clickable images.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function selectLink(string $value)
|
||||
public function selectLink(string $value): static
|
||||
{
|
||||
return $this->filterRelativeXPath(
|
||||
sprintf('descendant-or-self::a[contains(concat(\' \', normalize-space(string(.)), \' \'), %1$s) or ./img[contains(concat(\' \', normalize-space(string(@alt)), \' \'), %1$s)]]', static::xpathLiteral(' '.$value.' '))
|
||||
@@ -802,10 +720,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* Selects images by alt value.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function selectImage(string $value)
|
||||
public function selectImage(string $value): static
|
||||
{
|
||||
$xpath = sprintf('descendant-or-self::img[contains(normalize-space(string(@alt)), %s)]', static::xpathLiteral($value));
|
||||
|
||||
@@ -814,10 +730,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
/**
|
||||
* Selects a button by name or alt value for images.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
public function selectButton(string $value)
|
||||
public function selectButton(string $value): static
|
||||
{
|
||||
return $this->filterRelativeXPath(
|
||||
sprintf('descendant-or-self::input[((contains(%1$s, "submit") or contains(%1$s, "button")) and contains(concat(\' \', normalize-space(string(@value)), \' \'), %2$s)) or (contains(%1$s, "image") and contains(concat(\' \', normalize-space(string(@alt)), \' \'), %2$s)) or @id=%3$s or @name=%3$s] | descendant-or-self::button[contains(concat(\' \', normalize-space(string(.)), \' \'), %2$s) or @id=%3$s or @name=%3$s]', 'translate(@type, "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "abcdefghijklmnopqrstuvwxyz")', static::xpathLiteral(' '.$value.' '), static::xpathLiteral($value))
|
||||
@@ -827,11 +741,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns a Link object for the first node in the list.
|
||||
*
|
||||
* @return Link
|
||||
*
|
||||
* @throws \InvalidArgumentException If the current node list is empty or the selected node is not instance of DOMElement
|
||||
*/
|
||||
public function link(string $method = 'get')
|
||||
public function link(string $method = 'get'): Link
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -853,7 +765,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* @throws \InvalidArgumentException If the current node list contains non-DOMElement instances
|
||||
*/
|
||||
public function links()
|
||||
public function links(): array
|
||||
{
|
||||
$links = [];
|
||||
foreach ($this->nodes as $node) {
|
||||
@@ -870,11 +782,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns an Image object for the first node in the list.
|
||||
*
|
||||
* @return Image
|
||||
*
|
||||
* @throws \InvalidArgumentException If the current node list is empty
|
||||
*/
|
||||
public function image()
|
||||
public function image(): Image
|
||||
{
|
||||
if (!\count($this)) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -894,7 +804,7 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* @return Image[]
|
||||
*/
|
||||
public function images()
|
||||
public function images(): array
|
||||
{
|
||||
$images = [];
|
||||
foreach ($this as $node) {
|
||||
@@ -911,11 +821,9 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* Returns a Form object for the first node in the list.
|
||||
*
|
||||
* @return Form
|
||||
*
|
||||
* @throws \InvalidArgumentException If the current node list is empty or the selected node is not instance of DOMElement
|
||||
*/
|
||||
public function form(array $values = null, string $method = null)
|
||||
public function form(array $values = null, string $method = null): Form
|
||||
{
|
||||
if (!$this->nodes) {
|
||||
throw new \InvalidArgumentException('The current node list is empty.');
|
||||
@@ -964,10 +872,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
*
|
||||
* echo Crawler::xpathLiteral('a\'b"c');
|
||||
* //prints concat('a', "'", 'b"c')
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public static function xpathLiteral(string $s)
|
||||
public static function xpathLiteral(string $s): string
|
||||
{
|
||||
if (!str_contains($s, "'")) {
|
||||
return sprintf("'%s'", $s);
|
||||
@@ -997,10 +903,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
* Filters the list of nodes with an XPath expression.
|
||||
*
|
||||
* The XPath expression should already be processed to apply it in the context of each node.
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
private function filterRelativeXPath(string $xpath): object
|
||||
private function filterRelativeXPath(string $xpath): static
|
||||
{
|
||||
$crawler = $this->createSubCrawler(null);
|
||||
if (null === $this->document) {
|
||||
@@ -1106,19 +1010,12 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
return $xpath; // The XPath expression is invalid
|
||||
}
|
||||
|
||||
/**
|
||||
* @return \DOMNode|null
|
||||
*/
|
||||
public function getNode(int $position)
|
||||
public function getNode(int $position): ?\DOMNode
|
||||
{
|
||||
return $this->nodes[$position] ?? null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return int
|
||||
*/
|
||||
#[\ReturnTypeWillChange]
|
||||
public function count()
|
||||
public function count(): int
|
||||
{
|
||||
return \count($this->nodes);
|
||||
}
|
||||
@@ -1126,16 +1023,12 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
/**
|
||||
* @return \ArrayIterator<int, \DOMNode>
|
||||
*/
|
||||
#[\ReturnTypeWillChange]
|
||||
public function getIterator()
|
||||
public function getIterator(): \ArrayIterator
|
||||
{
|
||||
return new \ArrayIterator($this->nodes);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return array
|
||||
*/
|
||||
protected function sibling(\DOMNode $node, string $siblingDir = 'nextSibling')
|
||||
protected function sibling(\DOMNode $node, string $siblingDir = 'nextSibling'): array
|
||||
{
|
||||
$nodes = [];
|
||||
|
||||
@@ -1159,9 +1052,6 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
|
||||
|
||||
$internalErrors = libxml_use_internal_errors(true);
|
||||
if (\LIBXML_VERSION < 20900) {
|
||||
$disableEntities = libxml_disable_entity_loader(true);
|
||||
}
|
||||
|
||||
$dom = new \DOMDocument('1.0', $charset);
|
||||
$dom->validateOnParse = true;
|
||||
@@ -1171,9 +1061,6 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
}
|
||||
|
||||
libxml_use_internal_errors($internalErrors);
|
||||
if (\LIBXML_VERSION < 20900) {
|
||||
libxml_disable_entity_loader($disableEntities);
|
||||
}
|
||||
|
||||
return $dom;
|
||||
}
|
||||
@@ -1187,11 +1074,11 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
try {
|
||||
return mb_encode_numericentity($htmlContent, [0x80, 0x10FFFF, 0, 0x1FFFFF], $charset);
|
||||
} catch (\Exception|\ValueError $e) {
|
||||
} catch (\Exception|\ValueError) {
|
||||
try {
|
||||
$htmlContent = iconv($charset, 'UTF-8', $htmlContent);
|
||||
$htmlContent = mb_encode_numericentity($htmlContent, [0x80, 0x10FFFF, 0, 0x1FFFFF], 'UTF-8');
|
||||
} catch (\Exception|\ValueError $e) {
|
||||
} catch (\Exception|\ValueError) {
|
||||
}
|
||||
|
||||
return $htmlContent;
|
||||
@@ -1249,10 +1136,8 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
* Creates a crawler for some subnodes.
|
||||
*
|
||||
* @param \DOMNodeList|\DOMNode|\DOMNode[]|string|null $nodes
|
||||
*
|
||||
* @return static
|
||||
*/
|
||||
private function createSubCrawler($nodes): object
|
||||
private function createSubCrawler(\DOMNodeList|\DOMNode|array|string|null $nodes): static
|
||||
{
|
||||
$crawler = new static($nodes, $this->uri, $this->baseHref);
|
||||
$crawler->isHtml = $this->isHtml;
|
||||
@@ -1291,12 +1176,10 @@ class Crawler implements \Countable, \IteratorAggregate
|
||||
|
||||
private function canParseHtml5String(string $content): bool
|
||||
{
|
||||
if (null === $this->html5Parser) {
|
||||
return false;
|
||||
}
|
||||
if (false === ($pos = stripos($content, '<!doctype html>'))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
$header = substr($content, 0, $pos);
|
||||
|
||||
return '' === $header || $this->isValidHtml5Heading($header);
|
||||
|
Reference in New Issue
Block a user