You are here

FeedsCrawlerPattern.php in Feeds Crawler 7.2

Contains FeedsCrawlerPattern.

File

src/FeedsCrawlerPattern.php
View source
<?php

/**
 * @file
 * Contains FeedsCrawlerPattern.
 */

/**
 * Crawls links using a URL pattern.
 */
class FeedsCrawlerPattern extends FeedsCrawlerBase {

  /**
   * The default values for parse_url().
   *
   * @var array
   */
  protected static $defaultParts = array(
    'scheme' => 'http',
    'host' => '',
    'port' => '',
    'user' => '',
    'pass' => '',
    'path' => '',
    'query' => '',
    'fragment' => '',
  );

  /**
   * {@inheritdoc}
   */
  protected function beginFetch(FeedsSource $source, FeedsState $state) {
    parent::beginFetch($source, $state);
    $source_config = $source
      ->getConfigFor($this) + $this
      ->sourceDefaults();
    if (!isset($state->index)) {
      $state->index = $source_config['initial_index'];
    }
    else {
      $state->index += $source_config['increment'];
    }
  }

  /**
   * {@inheritdoc}
   */
  protected function getNextUrl(FeedsSource $source, $current_url) {
    $source_config = $source
      ->getConfigFor($this);
    foreach (array(
      'pattern',
      'initial_index',
      'increment',
    ) as $key) {
      if (!isset($source_config[$key]) || !strlen($source_config[$key])) {
        throw new FeedsCrawlerLinkNotFoundException();
      }
    }
    $parts = parse_url($current_url) + self::$defaultParts;
    $tokens = array();
    foreach ($parts as $key => $value) {
      $tokens['{' . $key . '}'] = $value;
    }
    $drupal_parts = drupal_parse_url($current_url);
    $tokens['{full_path}'] = $drupal_parts['path'];
    $tokens['{index}'] = $source
      ->state(FEEDS_FETCH)->index;
    return strtr($source_config['pattern'], $tokens);
  }

  /**
   * {@inheritdoc}
   */
  public function sourceDefaults() {
    return array(
      'pattern' => '{full_path}?page={index}',
      'initial_index' => 0,
      'increment' => 1,
    ) + parent::sourceDefaults();
  }

  /**
   * {@inheritdoc}
   */
  public function sourceForm($source_config) {
    $form = parent::sourceForm($source_config);
    $form['pattern'] = array(
      '#type' => 'textfield',
      '#title' => t('Pattern'),
      '#description' => t('The URL pattern.'),
      '#default_value' => isset($source_config['pattern']) ? $source_config['pattern'] : '{full_path}?page={index}',
    );
    $form['initial_index'] = array(
      '#type' => 'textfield',
      '#title' => t('Initial index'),
      '#description' => t('The initial index value.'),
      '#default_value' => isset($source_config['initial_index']) ? $source_config['initial_index'] : 0,
      '#element_validate' => array(
        'element_validate_number',
      ),
    );
    $form['increment'] = array(
      '#type' => 'textfield',
      '#title' => t('Increment'),
      '#description' => t('The amount to increment the index.'),
      '#default_value' => isset($source_config['increment']) ? $source_config['increment'] : 1,
      '#element_validate' => array(
        'element_validate_number',
      ),
    );
    return $form;
  }

}

Classes

Namesort descending Description
FeedsCrawlerPattern Crawls links using a URL pattern.