You are here

FeedsCrawlerNext.php in Feeds Crawler 7.2

Contains FeedsCrawlerNext.

File

src/FeedsCrawlerNext.php
View source
<?php

/**
 * @file
 * Contains FeedsCrawlerNext.
 */

/**
 * Automatically finds the next link via rel="next" links.
 */
class FeedsCrawlerNext extends FeedsCrawlerBase {

  /**
   * {@inheritdoc}
   */
  protected function getNextUrl(FeedsSource $source, $current_url) {
    $errors = libxml_use_internal_errors(TRUE);
    if (function_exists('libxml_disable_entity_loader')) {
      $loader = libxml_disable_entity_loader(TRUE);
    }
    $options = LIBXML_NONET;
    $options != defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0;
    $options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0;
    $document = new DOMDocument();
    $document->strictErrorChecking = FALSE;

    // Libxml specific.
    $document->recover = TRUE;
    $document
      ->loadXML($this
      ->getFetcherResult($current_url)
      ->getRaw(), $options);
    $xpath = new DOMXPath($document);
    $href = $xpath
      ->query('//*[local-name() = "link" and @rel="next"]/@href');
    libxml_use_internal_errors($errors);
    if (function_exists('libxml_disable_entity_loader')) {
      libxml_disable_entity_loader($loader);
    }
    libxml_clear_errors();
    if ($href->length === 0 || trim($href
      ->item(0)->nodeValue) === '') {
      throw new FeedsCrawlerLinkNotFoundException();
    }
    return $this
      ->makeUrlAbsolute($href
      ->item(0)->nodeValue, $current_url);
  }

}

Classes

Namesort descending Description
FeedsCrawlerNext Automatically finds the next link via rel="next" links.