FeedsCrawlerNext.php in Feeds Crawler 7.2
Contains FeedsCrawlerNext.
File
src/FeedsCrawlerNext.phpView source
<?php
/**
* @file
* Contains FeedsCrawlerNext.
*/
/**
* Automatically finds the next link via rel="next" links.
*/
class FeedsCrawlerNext extends FeedsCrawlerBase {
/**
* {@inheritdoc}
*/
protected function getNextUrl(FeedsSource $source, $current_url) {
$errors = libxml_use_internal_errors(TRUE);
if (function_exists('libxml_disable_entity_loader')) {
$loader = libxml_disable_entity_loader(TRUE);
}
$options = LIBXML_NONET;
$options != defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0;
$options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0;
$document = new DOMDocument();
$document->strictErrorChecking = FALSE;
// Libxml specific.
$document->recover = TRUE;
$document
->loadXML($this
->getFetcherResult($current_url)
->getRaw(), $options);
$xpath = new DOMXPath($document);
$href = $xpath
->query('//*[local-name() = "link" and @rel="next"]/@href');
libxml_use_internal_errors($errors);
if (function_exists('libxml_disable_entity_loader')) {
libxml_disable_entity_loader($loader);
}
libxml_clear_errors();
if ($href->length === 0 || trim($href
->item(0)->nodeValue) === '') {
throw new FeedsCrawlerLinkNotFoundException();
}
return $this
->makeUrlAbsolute($href
->item(0)->nodeValue, $current_url);
}
}
Classes
Name | Description |
---|---|
FeedsCrawlerNext | Automatically finds the next link via rel="next" links. |