You are here

FeedsQueryPathParser.inc in Feeds QueryPath Parser 7

Same filename and directory in other branches
  1. 6 FeedsQueryPathParser.inc

Provides the class for FeedsQueryPathParser.

File

FeedsQueryPathParser.inc
View source
<?php

/**
 * @file
 *
 * Provides the class for FeedsQueryPathParser.
 */
class FeedsQueryPathParser extends FeedsParser {

  /**
   * Implements FeedsParser::parse().
   */
  public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {

    // Setup mappings so they can be used in variable replacement.
    $mappings = $this
      ->getOwnMappings();

    // Set source config, if it's empty get config from importer.
    $this->source_config = $source
      ->getConfigFor($this);

    // Allow config inheritance.
    if (empty($this->source_config)) {
      $this->source_config = $this
        ->getConfig();
    }
    $this->rawXML = array_keys(array_filter($this->source_config['rawXML']));
    $this->debug = array_keys(array_filter($this->source_config['debug']['options']));
    $raw = trim($fetcher_result
      ->getRaw());
    if (empty($raw)) {
      throw new Exception(t('Feeds QueryPath parser: The document is empty.'));
    }
    $opts = array(
      'ignore_parser_warnings' => TRUE,
    );
    $result = new FeedsParserResult();

    // Set link so we can set the result link attribute.
    $fetcher_config = $source
      ->getConfigFor($source->importer->fetcher);
    $result->link = $fetcher_config['source'];
    $this
      ->includeQueryPath();
    $doc = @qp($raw, NULL, $opts);

    // Convert document to UTF-8
    $ContentType = qp($doc, 'meta[http-equiv="content-type"]');
    if ($ContentType
      ->hasAttr('content') && preg_match('/charset=([-\\w]*)/i', $ContentType
      ->attr('content'), $matches)) {
      $ContentType
        ->attr('content', preg_replace('/charset=([-\\w]*)/i', 'charset=utf-8', $ContentType
        ->attr('content')));
      qp($doc, 'meta[http-equiv="content-type"]')
        ->remove();
      qp($doc, 'head')
        ->prepend($ContentType
        ->html());
      $doc = qp(drupal_convert_to_utf8(utf8_decode($doc
        ->html()), $matches[1]), NULL, $opts);
    }
    $result->title = qp($doc, 'title', $opts)
      ->text();
    $context = qp($doc, $this->source_config['context'], $opts);
    $this
      ->debug($context, 'context');
    foreach ($context as $item) {
      $parsed_item = $variables = array();
      foreach ($this->source_config['sources'] as $source => $query) {

        // Variable substitution.
        $query = strtr($query, $variables);
        $parsed = $this
          ->parseSourceElement($item, $query, $source);

        // Avoid null values.
        if (isset($parsed)) {

          // Variable sunstitution can't handle arrays.
          if (!is_array($parsed)) {
            $variables['{' . $mappings[$source] . '}'] = $parsed;
          }
          else {
            $variables['{' . $mappings[$source] . '}'] = '';
          }
          $parsed_item[$source] = $parsed;
        }
      }
      if (!empty($parsed_item)) {
        $result->items[] = $parsed_item;
      }
    }
    return $result;
  }
  protected function parseSourceElement($item, $query, $source) {
    $attr = $this->source_config['attrs'][$source];
    if ($query == '' && $attr == '') {
      return;
    }
    if ($query != '') {
      $item = qp($item, $query);
    }
    $results = array();
    foreach ($item as $k => $i) {
      if ($attr != '') {
        $results[] = $i
          ->attr($attr);
      }
      else {
        if (in_array($source, $this->rawXML)) {
          $results[] = $i
            ->html();
        }
        else {
          $results[] = $i
            ->text();
        }
      }
    }
    $this
      ->debug($results, $source);

    /**
     * If there is one result, return it directly.  If there are no results,
     * return. Otherwise return the results.
     */
    if (count($results) === 1) {
      return $results[0];
    }
    if (count($results) === 0) {
      return;
    }
    return $results;
  }

  /**
   * Source form.
   */
  public function sourceForm($source_config) {
    $form = array();

    // Allow for config inheritance.
    if (empty($source_config)) {
      $source_config = $this->config;
    }
    $mappings_ = feeds_importer($this->id)->processor->config['mappings'];
    $uniques = $mappings = array();
    foreach ($mappings_ as $mapping) {
      if (strpos($mapping['source'], 'querypathparser:') === 0) {
        $mappings[$mapping['source']] = $mapping['target'];
        if ($mapping['unique']) {
          $uniques[] = $mapping['target'];
        }
      }
    }
    $form['querypath'] = array(
      '#type' => 'fieldset',
      '#title' => t('QueryPath Parser Settings'),
      '#tree' => TRUE,
      '#collapsible' => TRUE,
      '#collapsed' => TRUE,
    );
    if (empty($mappings)) {
      $form['querypath']['error_message']['#markup'] = '<div class="help">' . t('FeedsQueryPathParser: No mappings were defined. Define mappings !link.', array(
        '!link' => l('here', 'admin/structure/feeds/' . $this->id . '/mapping'),
      )) . '</div>';
      return $form;
    }
    $form['querypath']['context'] = array(
      '#type' => 'textfield',
      '#title' => t('Context'),
      '#required' => TRUE,
      '#description' => t('The element that represents the beginning of a new item, like h1 or body. If you identify a context that occurs more than once in a feed, a new node or item will be created each time it is encountered.'),
      '#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
      '#maxlength' => 1024,
    );
    $form['querypath']['sources'] = array(
      '#title' => t('Selectors'),
      '#type' => 'fieldset',
      '#description' => t('Indicate the CSS selector that marks where each field is located within the context, like div#content or h2:first.'),
    );
    $form['querypath']['attrs'] = array(
      '#title' => t('Attributes'),
      '#type' => 'fieldset',
      '#description' => t('Identify the attribute value to use for a field, if desired, like src or title. The element text will be used if no attribute is identified.'),
      '#collapsible' => TRUE,
      '#collapsed' => TRUE,
    );
    if (!empty($uniques)) {
      $items = array(
        format_plural(count($uniques), t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.', array(
          '!column' => implode(', ', $uniques),
        )), t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.', array(
          '!columns' => implode(', ', $uniques),
        ))),
      );
      $form['querypath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array(
        'items' => $items,
      )) . '</div>';
    }
    $variables = array();
    foreach ($mappings as $source => $target) {
      $form['querypath']['sources'][$source] = array(
        '#type' => 'textfield',
        '#title' => $target,
        '#description' => t('The CSS selector for this field.'),
        '#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
        '#maxlength' => 1024,
      );
      if (!empty($variables)) {
        $form['querypath']['sources'][$source]['#description'] .= '<br>' . t('The variables ' . implode(', ', $variables) . ' are available for replacement.');
      }
      $variables[] = '{' . $target . '}';
      $form['querypath']['attrs'][$source] = array(
        '#type' => 'textfield',
        '#title' => $target,
        '#description' => t('The attribute to return.'),
        '#default_value' => isset($source_config['attrs'][$source]) ? $source_config['attrs'][$source] : '',
        '#maxlength' => 1024,
      );
    }
    $form['querypath']['rawXML'] = array(
      '#type' => 'checkboxes',
      '#options' => $mappings,
      '#default_value' => isset($source_config['rawXML']) ? $source_config['rawXML'] : array(),
    );
    $form['querypath']['debug'] = array(
      '#type' => 'fieldset',
      '#title' => t('Debug'),
      '#collapsible' => TRUE,
      '#collapsed' => TRUE,
    );
    $form['querypath']['debug']['options'] = array(
      '#type' => 'checkboxes',
      '#title' => t('Debug query'),
      '#options' => array_merge(array(
        'context' => 'context',
      ), $mappings),
      '#default_value' => isset($source_config['debug']['options']) ? $source_config['debug']['options'] : array(),
    );
    return $form;
  }

  /**
   * Override parent::configForm().
   */
  public function configForm(&$form_state) {
    $form = $this
      ->sourceForm($this->config);
    $form['querypath']['context']['#required'] = FALSE;
    $form['querypath']['#collapsed'] = FALSE;
    return $form;
  }

  /**
   * Override parent::sourceDefaults().
   */
  public function sourceDefaults() {
    return array();
  }

  /**
   * Define defaults.
   *
   * Override parent::configDefaults().
   */
  public function configDefaults() {
    return array(
      'context' => '',
      'sources' => array(),
      'debug' => array(),
      'attrs' => array(),
      'rawXML' => array(),
    );
  }

  /**
   * Override parent::sourceFormValidate().
   *
   * If the values of this source are the same as the base config we set them to
   * blank to that the values will be inherited from the importer defaults.
   *
   * @param &$values
   *   The values from the form to validate, passed by reference.
   */
  public function sourceFormValidate(&$values) {
    $values = $values['querypath'];
    ksort($values);
    ksort($this->config);
    if ($values === $this->config) {
      $values = array();
      return;
    }
    $this
      ->configFormValidate($values);
  }

  /**
   * Override parent::sourceFormValidate().
   */
  public function configFormValidate(&$values) {
    $config = FALSE;
    $mappings = $this
      ->getOwnMappings();
    $doc = '<html></html>';
    if (isset($values['querypath'])) {
      $values = $values['querypath'];
      $config = TRUE;
    }
    $values['context'] = trim($values['context']);
    try {
      $this
        ->includeQueryPath();
      qp($doc, $values['context']);
    } catch (CSSParseException $e) {
      $elem = 'feeds][FeedsQueryPathParser][querypath][context';
      if ($config) {
        $elem = 'querypath][context';
      }
      form_set_error($elem, $e
        ->getMessage());
    }
    foreach ($values['sources'] as $key => &$query) {
      $query = trim($query);
      try {
        qp($doc, $query);
      } catch (CSSParseException $e) {

        // Check for variable substitution.
        $variable_present = FALSE;

        // Our variable substitution options can cause syntax errors, check
        // if we're doing that.
        foreach ($mappings as $target) {
          if (strpos($query, '{' . $target . '}') !== FALSE) {
            $variable_present = TRUE;
          }
        }
        if (!$variable_present) {
          $elem = 'feeds][FeedsQueryPathParser][querypath][sources][';
          if ($config) {
            $elem = 'querypath][sources][';
          }
          form_set_error($elem . $key, $e
            ->getMessage());
        }
      }
    }
  }

  /**
   * Override parent::getMappingSources().
   */
  public function getMappingSources() {
    $mappings = $this
      ->getOwnMappings();
    $next = 0;
    if (!empty($mappings)) {
      $keys = array_keys($mappings);
      $last_mapping = end($keys);
      $next = explode(':', $last_mapping);
      $next = $next[1] + 1;
    }
    return array(
      'querypathparser:' . $next => array(
        'name' => t('QueryPath Expression'),
        'description' => t('Allows you to configure a CSS selector expression that will populate this field.'),
      ),
    ) + parent::getMappingSources();
  }
  protected function getOwnMappings() {
    $importer_config = feeds_importer($this->id)
      ->getConfig();
    return $this
      ->filterMappings($importer_config['processor']['config']['mappings']);
  }

  /**
   * Filters mappings, returning the ones that belong to us.
   */
  protected function filterMappings($mappings) {
    $our_mappings = array();
    foreach ($mappings as $mapping) {
      if (strpos($mapping['source'], 'querypathparser:') === 0) {
        $our_mappings[$mapping['source']] = $mapping['target'];
      }
    }
    return $our_mappings;
  }

  /**
   * Prints out results from queries.
   */
  protected function debug($item, $source) {
    if (in_array($source, $this->debug)) {
      $o = '<ul>';
      foreach ($item as $i) {
        if (is_object($i)) {
          $i = $i
            ->html();
        }
        $o .= '<li>' . check_plain(var_export($i, TRUE)) . '</li>';
      }
      $o .= '</ul>';
      drupal_set_message($source . ':' . $o);
    }
  }
  protected function includeQueryPath() {
    if (function_exists('querypath_include_code')) {
      querypath_include_code();
    }
  }

}

/**
 * Implementation of hook_form_feeds_ui_mapping_form_alter().
 */
function feeds_querypath_parser_form_feeds_ui_mapping_form_alter(&$form, &$form_state) {
  $form['help']['#markup'] .= '<p>' . t('The QueryPath Expression source allows you to use QueryPath to populate each field. Add a new QueryPath Expression source for each target you want to map.') . '</p>';
}

Functions

Namesort descending Description
feeds_querypath_parser_form_feeds_ui_mapping_form_alter Implementation of hook_form_feeds_ui_mapping_form_alter().

Classes

Namesort descending Description
FeedsQueryPathParser @file