You are here

protected function FeedsCrawlerNext::getNextUrl in Feeds Crawler 7.2

Subclasses must override this to return the next URL.

Parameters

FeedsSource $source: The feed source.

string $current_url: The current URL being fetched.

Return value

string The next URL.

Throws

FeedsCrawlerLinkNotFoundException Thrown if the next link could not be found.

Overrides FeedsCrawlerBase::getNextUrl

File

src/FeedsCrawlerNext.php, line 16
Contains FeedsCrawlerNext.

Class

FeedsCrawlerNext
Automatically finds the next link via rel="next" links.

Code

protected function getNextUrl(FeedsSource $source, $current_url) {
  $errors = libxml_use_internal_errors(TRUE);
  if (function_exists('libxml_disable_entity_loader')) {
    $loader = libxml_disable_entity_loader(TRUE);
  }
  $options = LIBXML_NONET;
  $options != defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0;
  $options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0;
  $document = new DOMDocument();
  $document->strictErrorChecking = FALSE;

  // Libxml specific.
  $document->recover = TRUE;
  $document
    ->loadXML($this
    ->getFetcherResult($current_url)
    ->getRaw(), $options);
  $xpath = new DOMXPath($document);
  $href = $xpath
    ->query('//*[local-name() = "link" and @rel="next"]/@href');
  libxml_use_internal_errors($errors);
  if (function_exists('libxml_disable_entity_loader')) {
    libxml_disable_entity_loader($loader);
  }
  libxml_clear_errors();
  if ($href->length === 0 || trim($href
    ->item(0)->nodeValue) === '') {
    throw new FeedsCrawlerLinkNotFoundException();
  }
  return $this
    ->makeUrlAbsolute($href
    ->item(0)->nodeValue, $current_url);
}