public function FeedsXPathParserBase::parse in Feeds XPath Parser 7
Same name and namespace in other branches
- 6 FeedsXPathParserBase.inc \FeedsXPathParserBase::parse()
Implements FeedsParser::parse().
File
- ./
FeedsXPathParserBase.inc, line 68 - Provides the base class for FeedsXPathParserHTML and FeedsXPathParserXML.
Class
- FeedsXPathParserBase
- Base class for the HTML and XML parsers.
Code
public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
$source_config = $source
->getConfigFor($this);
$state = $source
->state(FEEDS_PARSE);
if (empty($source_config)) {
$source_config = $this
->getConfig();
}
$this->doc = $this
->setup($source_config, $fetcher_result);
$parser_result = new FeedsParserResult();
$mappings = $this
->getOwnMappings();
$this->rawXML = array_keys(array_filter($source_config['rawXML']));
// Set link.
$fetcher_config = $source
->getConfigFor($source->importer->fetcher);
$parser_result->link = isset($fetcher_config['source']) ? $fetcher_config['source'] : '';
$this->xpath = new FeedsXPathParserDOMXPath($this->doc);
$config = array();
$config['debug'] = array_keys(array_filter($source_config['exp']['debug']));
$config['errors'] = $source_config['exp']['errors'];
$this->xpath
->setConfig($config);
$context_query = '(' . $source_config['context'] . ')';
if (empty($state->total)) {
$state->total = $this->xpath
->namespacedQuery('count(' . $context_query . ')', $this->doc, 'count');
}
$start = $state->pointer ? $state->pointer : 0;
$limit = $start + $source->importer
->getLimit();
$end = $limit > $state->total ? $state->total : $limit;
$state->pointer = $end;
$context_query .= "[position() > {$start} and position() <= {$end}]";
$progress = $state->pointer ? $state->pointer : 0;
$all_nodes = $this->xpath
->namespacedQuery($context_query, NULL, 'context');
// The source config could have old values that don't exist in the importer.
$sources = array_intersect_key($source_config['sources'], $mappings);
foreach ($all_nodes as $node) {
// Invoke a hook to check whether the domnode should be skipped.
if (in_array(TRUE, module_invoke_all('feeds_xpathparser_filter_domnode', $node, $this->doc, $source), TRUE)) {
continue;
}
$parsed_item = $variables = array();
foreach ($sources as $element_key => $query) {
// Variable substitution.
$query = strtr($query, $variables);
// Parse the item.
$result = $this
->parseSourceElement($query, $node, $element_key);
if (isset($result)) {
$variables['$' . $mappings[$element_key]] = is_array($result) ? reset($result) : $result;
$parsed_item[$element_key] = $result;
}
}
if (!empty($parsed_item)) {
$parser_result->items[] = $parsed_item;
}
}
$state
->progress($state->total, $progress);
unset($this->doc);
unset($this->xpath);
return $parser_result;
}