public function FeedsCrawler::fetch in Feeds Crawler 7
Same name and namespace in other branches
- 6.2 FeedsCrawler.inc \FeedsCrawler::fetch()
Implements FeedsFetcher::fetch().
File
- ./
FeedsCrawler.inc, line 16 - Home of the FeedsCrawler.
Class
- FeedsCrawler
- Fetches data via HTTP.
Code
public function fetch(FeedsSource $source) {
$source_config = $source
->getConfigFor($this);
$config = $source_config['crawler'];
if ($this->config['use_pubsubhubbub'] && ($raw = $this
->subscriber($source->feed_nid)
->receive())) {
return new FeedsFetcherResult($raw);
}
$state = $source
->state(FEEDS_FETCH);
if ($state->total == 0) {
$state->total = $config['num_pages'] == 0 ? 100000 : $config['num_pages'];
$state->count = $state->total;
$url = $source_config['source'];
}
else {
$url = $state->next_url;
}
$result = new FeedsHTTPFetcherResult($url);
if ($config['first_run'] && $source->config['FeedsCrawler']['crawled']) {
$state
->progress($state->total, $state->total);
return $result;
}
sleep($config['delay']);
if ($config['auto'] && ($href = $this
->parseAuto($result, $source_config)) != FALSE || $config['url']['url_pattern'] && ($href = $this
->parseUrl($config, $state)) != FALSE || $config['xpath'] && ($href = $this
->parseXPath($result, $source_config)) != FALSE) {
$state->next_url = $href;
$state->count--;
$state
->progress($state->total, $state->total - $state->count);
if ($state->count <= 0) {
$source->config['FeedsCrawler']['crawled'] = TRUE;
}
}
else {
$state
->progress($state->total, $state->total);
}
return $result;
}