You are here

public function FeedsCrawler::fetch in Feeds Crawler 6.2

Same name and namespace in other branches
  1. 7 FeedsCrawler.inc \FeedsCrawler::fetch()

Implements FeedsFetcher::fetch().

Overrides FeedsHTTPFetcher::fetch

File

./FeedsCrawler.inc, line 16
Home of the FeedsCrawler.

Class

FeedsCrawler
Fetches data via HTTP.

Code

public function fetch(FeedsSource $source) {
  $source_config = $source
    ->getConfigFor($this);

  // Use a variable to store state. --hack.
  $state = variable_get('feeds_crawler_' . $this->id . '_' . $source->feed_nid, new stdClass());

  // Support Pubsubhubbub
  if ($this->config['use_pubsubhubbub'] && ($raw = $this
    ->subscriber($source->feed_nid)
    ->receive())) {
    $state->count = 0;
    variable_set('feeds_crawler_' . $this->id . '_' . $source->feed_nid, $state);
    return new FeedsImportBatch($raw, $source->feed_nid);
  }

  //return new FeedsHTTPBatch($source_config['source'], $source->feed_nid);
  $config = $source_config['crawler'];

  // First run.
  if (empty($state->total)) {
    $state->total = $config['num_pages'] == 0 ? 100000 : $config['num_pages'];
    $state->count = $state->total;
    $url = $source_config['source'];
  }
  else {
    $url = $state->next_url;
  }
  $result = new FeedsHTTPBatch($url, $source->feed_nid);
  if ($config['first_run'] && $source->config['FeedsCrawler']['crawled']) {
    $state->count = 0;
    variable_set('feeds_crawler_' . $this->id . '_' . $source->feed_nid, $state);
    return $result;
  }
  sleep($config['delay']);
  if ($config['auto'] && ($href = $this
    ->parseAuto($result, $source_config)) != FALSE || $config['url']['url_pattern'] && ($href = $this
    ->parseUrl($config, $state)) != FALSE || $config['xpath'] && ($href = $this
    ->parseXPath($result, $source_config)) != FALSE) {
    $state->next_url = $href;
    $state->count--;
    if ($state->count <= 0) {
      $source->config['FeedsCrawler']['crawled'] = TRUE;
    }
  }
  else {
    $state->count = 0;
  }
  variable_set('feeds_crawler_' . $this->id . '_' . $source->feed_nid, $state);
  return $result;
}