class FeedsCrawlerNext in Feeds Crawler 7.2
Automatically finds the next link via rel="next" links.
Hierarchy
- class \FeedsCrawlerBase extends \FeedsHTTPFetcher
- class \FeedsCrawlerNext
Expanded class hierarchy of FeedsCrawlerNext
1 string reference to 'FeedsCrawlerNext'
- feeds_crawler_feeds_plugins in ./
feeds_crawler.feeds.inc - Implements hook_feeds_plugings().
File
- src/
FeedsCrawlerNext.php, line 11 - Contains FeedsCrawlerNext.
View source
class FeedsCrawlerNext extends FeedsCrawlerBase {
/**
* {@inheritdoc}
*/
protected function getNextUrl(FeedsSource $source, $current_url) {
$errors = libxml_use_internal_errors(TRUE);
if (function_exists('libxml_disable_entity_loader')) {
$loader = libxml_disable_entity_loader(TRUE);
}
$options = LIBXML_NONET;
$options != defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0;
$options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0;
$document = new DOMDocument();
$document->strictErrorChecking = FALSE;
// Libxml specific.
$document->recover = TRUE;
$document
->loadXML($this
->getFetcherResult($current_url)
->getRaw(), $options);
$xpath = new DOMXPath($document);
$href = $xpath
->query('//*[local-name() = "link" and @rel="next"]/@href');
libxml_use_internal_errors($errors);
if (function_exists('libxml_disable_entity_loader')) {
libxml_disable_entity_loader($loader);
}
libxml_clear_errors();
if ($href->length === 0 || trim($href
->item(0)->nodeValue) === '') {
throw new FeedsCrawlerLinkNotFoundException();
}
return $this
->makeUrlAbsolute($href
->item(0)->nodeValue, $current_url);
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
FeedsCrawlerBase:: |
protected | function | Called before fetching the next link. | 1 |
FeedsCrawlerBase:: |
public | function | ||
FeedsCrawlerBase:: |
public | function | ||
FeedsCrawlerBase:: |
public | function | ||
FeedsCrawlerBase:: |
public | function | ||
FeedsCrawlerBase:: |
protected | function | Called after fetching the next link. | |
FeedsCrawlerBase:: |
public | function | ||
FeedsCrawlerBase:: |
protected | function | Returns a new fetcher result object. | |
FeedsCrawlerBase:: |
public | function | ||
FeedsCrawlerBase:: |
protected | function | Builds an absolute URL. | |
FeedsCrawlerBase:: |
public | function | 1 | |
FeedsCrawlerBase:: |
public | function | 1 | |
FeedsCrawlerBase:: |
public | function | ||
FeedsCrawlerNext:: |
protected | function |
Subclasses must override this to return the next URL. Overrides FeedsCrawlerBase:: |