You are here

public function FeedsXPathParserBase::parse in Feeds XPath Parser 7

Same name and namespace in other branches
  1. 6 FeedsXPathParserBase.inc \FeedsXPathParserBase::parse()

Implements FeedsParser::parse().

File

./FeedsXPathParserBase.inc, line 68
Provides the base class for FeedsXPathParserHTML and FeedsXPathParserXML.

Class

FeedsXPathParserBase
Base class for the HTML and XML parsers.

Code

public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
  $source_config = $source
    ->getConfigFor($this);
  $state = $source
    ->state(FEEDS_PARSE);
  if (empty($source_config)) {
    $source_config = $this
      ->getConfig();
  }
  $this->doc = $this
    ->setup($source_config, $fetcher_result);
  $parser_result = new FeedsParserResult();
  $mappings = $this
    ->getOwnMappings();
  $this->rawXML = array_keys(array_filter($source_config['rawXML']));

  // Set link.
  $fetcher_config = $source
    ->getConfigFor($source->importer->fetcher);
  $parser_result->link = isset($fetcher_config['source']) ? $fetcher_config['source'] : '';
  $this->xpath = new FeedsXPathParserDOMXPath($this->doc);
  $config = array();
  $config['debug'] = array_keys(array_filter($source_config['exp']['debug']));
  $config['errors'] = $source_config['exp']['errors'];
  $this->xpath
    ->setConfig($config);
  $context_query = '(' . $source_config['context'] . ')';
  if (empty($state->total)) {
    $state->total = $this->xpath
      ->namespacedQuery('count(' . $context_query . ')', $this->doc, 'count');
  }
  $start = $state->pointer ? $state->pointer : 0;
  $limit = $start + $source->importer
    ->getLimit();
  $end = $limit > $state->total ? $state->total : $limit;
  $state->pointer = $end;
  $context_query .= "[position() > {$start} and position() <= {$end}]";
  $progress = $state->pointer ? $state->pointer : 0;
  $all_nodes = $this->xpath
    ->namespacedQuery($context_query, NULL, 'context');

  // The source config could have old values that don't exist in the importer.
  $sources = array_intersect_key($source_config['sources'], $mappings);
  foreach ($all_nodes as $node) {

    // Invoke a hook to check whether the domnode should be skipped.
    if (in_array(TRUE, module_invoke_all('feeds_xpathparser_filter_domnode', $node, $this->doc, $source), TRUE)) {
      continue;
    }
    $parsed_item = $variables = array();
    foreach ($sources as $element_key => $query) {

      // Variable substitution.
      $query = strtr($query, $variables);

      // Parse the item.
      $result = $this
        ->parseSourceElement($query, $node, $element_key);
      if (isset($result)) {
        $variables['$' . $mappings[$element_key]] = is_array($result) ? reset($result) : $result;
        $parsed_item[$element_key] = $result;
      }
    }
    if (!empty($parsed_item)) {
      $parser_result->items[] = $parsed_item;
    }
  }
  $state
    ->progress($state->total, $progress);
  unset($this->doc);
  unset($this->xpath);
  return $parser_result;
}