public function FeedsCrawler::fetch in Feeds Crawler 6.2
Same name and namespace in other branches
- 7 FeedsCrawler.inc \FeedsCrawler::fetch()
Implements FeedsFetcher::fetch().
Overrides FeedsHTTPFetcher::fetch
File
- ./
FeedsCrawler.inc, line 16 - Home of the FeedsCrawler.
Class
- FeedsCrawler
- Fetches data via HTTP.
Code
public function fetch(FeedsSource $source) {
$source_config = $source
->getConfigFor($this);
// Use a variable to store state. --hack.
$state = variable_get('feeds_crawler_' . $this->id . '_' . $source->feed_nid, new stdClass());
// Support Pubsubhubbub
if ($this->config['use_pubsubhubbub'] && ($raw = $this
->subscriber($source->feed_nid)
->receive())) {
$state->count = 0;
variable_set('feeds_crawler_' . $this->id . '_' . $source->feed_nid, $state);
return new FeedsImportBatch($raw, $source->feed_nid);
}
//return new FeedsHTTPBatch($source_config['source'], $source->feed_nid);
$config = $source_config['crawler'];
// First run.
if (empty($state->total)) {
$state->total = $config['num_pages'] == 0 ? 100000 : $config['num_pages'];
$state->count = $state->total;
$url = $source_config['source'];
}
else {
$url = $state->next_url;
}
$result = new FeedsHTTPBatch($url, $source->feed_nid);
if ($config['first_run'] && $source->config['FeedsCrawler']['crawled']) {
$state->count = 0;
variable_set('feeds_crawler_' . $this->id . '_' . $source->feed_nid, $state);
return $result;
}
sleep($config['delay']);
if ($config['auto'] && ($href = $this
->parseAuto($result, $source_config)) != FALSE || $config['url']['url_pattern'] && ($href = $this
->parseUrl($config, $state)) != FALSE || $config['xpath'] && ($href = $this
->parseXPath($result, $source_config)) != FALSE) {
$state->next_url = $href;
$state->count--;
if ($state->count <= 0) {
$source->config['FeedsCrawler']['crawled'] = TRUE;
}
}
else {
$state->count = 0;
}
variable_set('feeds_crawler_' . $this->id . '_' . $source->feed_nid, $state);
return $result;
}