FeedsCrawlerPattern.php in Feeds Crawler 7.2
Contains FeedsCrawlerPattern.
File
src/FeedsCrawlerPattern.phpView source
<?php
/**
* @file
* Contains FeedsCrawlerPattern.
*/
/**
* Crawls links using a URL pattern.
*/
class FeedsCrawlerPattern extends FeedsCrawlerBase {
/**
* The default values for parse_url().
*
* @var array
*/
protected static $defaultParts = array(
'scheme' => 'http',
'host' => '',
'port' => '',
'user' => '',
'pass' => '',
'path' => '',
'query' => '',
'fragment' => '',
);
/**
* {@inheritdoc}
*/
protected function beginFetch(FeedsSource $source, FeedsState $state) {
parent::beginFetch($source, $state);
$source_config = $source
->getConfigFor($this) + $this
->sourceDefaults();
if (!isset($state->index)) {
$state->index = $source_config['initial_index'];
}
else {
$state->index += $source_config['increment'];
}
}
/**
* {@inheritdoc}
*/
protected function getNextUrl(FeedsSource $source, $current_url) {
$source_config = $source
->getConfigFor($this);
foreach (array(
'pattern',
'initial_index',
'increment',
) as $key) {
if (!isset($source_config[$key]) || !strlen($source_config[$key])) {
throw new FeedsCrawlerLinkNotFoundException();
}
}
$parts = parse_url($current_url) + self::$defaultParts;
$tokens = array();
foreach ($parts as $key => $value) {
$tokens['{' . $key . '}'] = $value;
}
$drupal_parts = drupal_parse_url($current_url);
$tokens['{full_path}'] = $drupal_parts['path'];
$tokens['{index}'] = $source
->state(FEEDS_FETCH)->index;
return strtr($source_config['pattern'], $tokens);
}
/**
* {@inheritdoc}
*/
public function sourceDefaults() {
return array(
'pattern' => '{full_path}?page={index}',
'initial_index' => 0,
'increment' => 1,
) + parent::sourceDefaults();
}
/**
* {@inheritdoc}
*/
public function sourceForm($source_config) {
$form = parent::sourceForm($source_config);
$form['pattern'] = array(
'#type' => 'textfield',
'#title' => t('Pattern'),
'#description' => t('The URL pattern.'),
'#default_value' => isset($source_config['pattern']) ? $source_config['pattern'] : '{full_path}?page={index}',
);
$form['initial_index'] = array(
'#type' => 'textfield',
'#title' => t('Initial index'),
'#description' => t('The initial index value.'),
'#default_value' => isset($source_config['initial_index']) ? $source_config['initial_index'] : 0,
'#element_validate' => array(
'element_validate_number',
),
);
$form['increment'] = array(
'#type' => 'textfield',
'#title' => t('Increment'),
'#description' => t('The amount to increment the index.'),
'#default_value' => isset($source_config['increment']) ? $source_config['increment'] : 1,
'#element_validate' => array(
'element_validate_number',
),
);
return $form;
}
}
Classes
Name | Description |
---|---|
FeedsCrawlerPattern | Crawls links using a URL pattern. |