You are here

public function FeedsCrawler::fetch in Feeds Crawler 7

Same name and namespace in other branches
  1. 6.2 FeedsCrawler.inc \FeedsCrawler::fetch()

Implements FeedsFetcher::fetch().

File

./FeedsCrawler.inc, line 16
Home of the FeedsCrawler.

Class

FeedsCrawler
Fetches data via HTTP.

Code

public function fetch(FeedsSource $source) {
  $source_config = $source
    ->getConfigFor($this);
  $config = $source_config['crawler'];
  if ($this->config['use_pubsubhubbub'] && ($raw = $this
    ->subscriber($source->feed_nid)
    ->receive())) {
    return new FeedsFetcherResult($raw);
  }
  $state = $source
    ->state(FEEDS_FETCH);
  if ($state->total == 0) {
    $state->total = $config['num_pages'] == 0 ? 100000 : $config['num_pages'];
    $state->count = $state->total;
    $url = $source_config['source'];
  }
  else {
    $url = $state->next_url;
  }
  $result = new FeedsHTTPFetcherResult($url);
  if ($config['first_run'] && $source->config['FeedsCrawler']['crawled']) {
    $state
      ->progress($state->total, $state->total);
    return $result;
  }
  sleep($config['delay']);
  if ($config['auto'] && ($href = $this
    ->parseAuto($result, $source_config)) != FALSE || $config['url']['url_pattern'] && ($href = $this
    ->parseUrl($config, $state)) != FALSE || $config['xpath'] && ($href = $this
    ->parseXPath($result, $source_config)) != FALSE) {
    $state->next_url = $href;
    $state->count--;
    $state
      ->progress($state->total, $state->total - $state->count);
    if ($state->count <= 0) {
      $source->config['FeedsCrawler']['crawled'] = TRUE;
    }
  }
  else {
    $state
      ->progress($state->total, $state->total);
  }
  return $result;
}