You are here

class FeedsCrawlerPattern in Feeds Crawler 7.2

Crawls links using a URL pattern.

Hierarchy

Expanded class hierarchy of FeedsCrawlerPattern

1 string reference to 'FeedsCrawlerPattern'
feeds_crawler_feeds_plugins in ./feeds_crawler.feeds.inc
Implements hook_feeds_plugings().

File

src/FeedsCrawlerPattern.php, line 11
Contains FeedsCrawlerPattern.

View source
class FeedsCrawlerPattern extends FeedsCrawlerBase {

  /**
   * The default values for parse_url().
   *
   * @var array
   */
  protected static $defaultParts = array(
    'scheme' => 'http',
    'host' => '',
    'port' => '',
    'user' => '',
    'pass' => '',
    'path' => '',
    'query' => '',
    'fragment' => '',
  );

  /**
   * {@inheritdoc}
   */
  protected function beginFetch(FeedsSource $source, FeedsState $state) {
    parent::beginFetch($source, $state);
    $source_config = $source
      ->getConfigFor($this) + $this
      ->sourceDefaults();
    if (!isset($state->index)) {
      $state->index = $source_config['initial_index'];
    }
    else {
      $state->index += $source_config['increment'];
    }
  }

  /**
   * {@inheritdoc}
   */
  protected function getNextUrl(FeedsSource $source, $current_url) {
    $source_config = $source
      ->getConfigFor($this);
    foreach (array(
      'pattern',
      'initial_index',
      'increment',
    ) as $key) {
      if (!isset($source_config[$key]) || !strlen($source_config[$key])) {
        throw new FeedsCrawlerLinkNotFoundException();
      }
    }
    $parts = parse_url($current_url) + self::$defaultParts;
    $tokens = array();
    foreach ($parts as $key => $value) {
      $tokens['{' . $key . '}'] = $value;
    }
    $drupal_parts = drupal_parse_url($current_url);
    $tokens['{full_path}'] = $drupal_parts['path'];
    $tokens['{index}'] = $source
      ->state(FEEDS_FETCH)->index;
    return strtr($source_config['pattern'], $tokens);
  }

  /**
   * {@inheritdoc}
   */
  public function sourceDefaults() {
    return array(
      'pattern' => '{full_path}?page={index}',
      'initial_index' => 0,
      'increment' => 1,
    ) + parent::sourceDefaults();
  }

  /**
   * {@inheritdoc}
   */
  public function sourceForm($source_config) {
    $form = parent::sourceForm($source_config);
    $form['pattern'] = array(
      '#type' => 'textfield',
      '#title' => t('Pattern'),
      '#description' => t('The URL pattern.'),
      '#default_value' => isset($source_config['pattern']) ? $source_config['pattern'] : '{full_path}?page={index}',
    );
    $form['initial_index'] = array(
      '#type' => 'textfield',
      '#title' => t('Initial index'),
      '#description' => t('The initial index value.'),
      '#default_value' => isset($source_config['initial_index']) ? $source_config['initial_index'] : 0,
      '#element_validate' => array(
        'element_validate_number',
      ),
    );
    $form['increment'] = array(
      '#type' => 'textfield',
      '#title' => t('Increment'),
      '#description' => t('The amount to increment the index.'),
      '#default_value' => isset($source_config['increment']) ? $source_config['increment'] : 1,
      '#element_validate' => array(
        'element_validate_number',
      ),
    );
    return $form;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
FeedsCrawlerBase::clear public function
FeedsCrawlerBase::configDefaults public function
FeedsCrawlerBase::configForm public function
FeedsCrawlerBase::configFormValidate public function
FeedsCrawlerBase::endFetch protected function Called after fetching the next link.
FeedsCrawlerBase::fetch public function
FeedsCrawlerBase::getFetcherResult protected function Returns a new fetcher result object.
FeedsCrawlerBase::hasConfigForm public function
FeedsCrawlerBase::makeUrlAbsolute protected function Builds an absolute URL.
FeedsCrawlerBase::sourceFormValidate public function
FeedsCrawlerPattern::$defaultParts protected static property The default values for parse_url().
FeedsCrawlerPattern::beginFetch protected function Called before fetching the next link. Overrides FeedsCrawlerBase::beginFetch
FeedsCrawlerPattern::getNextUrl protected function Subclasses must override this to return the next URL. Overrides FeedsCrawlerBase::getNextUrl
FeedsCrawlerPattern::sourceDefaults public function Overrides FeedsCrawlerBase::sourceDefaults
FeedsCrawlerPattern::sourceForm public function Overrides FeedsCrawlerBase::sourceForm