You are here

FeedsJSONPathParser.inc in Feeds JSONPath Parser 7

Same filename and directory in other branches
  1. 6 FeedsJSONPathParser.inc

Contains FeedsJSONPathParser.

File

FeedsJSONPathParser.inc
View source
<?php

/**
 * @file
 * Contains FeedsJSONPathParser.
 */
use Flow\JSONPath\JSONPath;

/**
 * Parses JSON using JSONPath.
 */
class FeedsJSONPathParser extends FeedsParser {

  /**
   * A regular expression that finds four byte UTF-8 chars.
   *
   * @var string
   */
  protected static $fourByteRegex = '/(?:\\xF0[\\x90-\\xBF][\\x80-\\xBF]{2}|[\\xF1-\\xF3][\\x80-\\xBF]{3}|\\xF4[\\x80-\\x8F][\\x80-\\xBF]{2})/s';

  /**
   * The source fields to debug.
   *
   * @var array
   */
  protected $debug = array();

  /**
   * Loads the necessary library.
   *
   * @throws RuntimeException
   *   Thrown if the library does not exist.
   */
  protected function loadLibrary() {
    if (!feeds_jsonpath_parser_load_library()) {
      if (user_access('administer site configuration')) {
        $link = l(t('status report'), 'admin/reports/status');
      }
      else {
        $link = t('status report');
      }
      throw new RuntimeException(t('The JSONPath library is not installed. Check the !status_report_link for more info.', array(
        '!status_report_link' => $link,
      )));
    }
  }

  /**
   * Implements FeedsParser::parse().
   */
  public function parse(FeedsSource $source, FeedsFetcherResult $fetcher_result) {
    $this
      ->loadLibrary();
    $mappings = $this
      ->getOwnMappings();
    $source_config = $source
      ->getConfigFor($this);

    // Allow config inheritance.
    if (empty($source_config)) {
      $source_config = $this->config;
    }
    $this->debug = array_keys(array_filter($source_config['debug']['options']));
    $raw = trim($fetcher_result
      ->getRaw());
    $result = new FeedsParserResult();

    // Set link so we can set the result link attribute.
    $fetcher_config = $source
      ->getConfigFor($source->importer->fetcher);
    $result->link = $fetcher_config['source'];
    $array = json_decode($raw, TRUE);

    // Support JSON lines format.
    if (!is_array($array)) {
      $raw = preg_replace('/}\\s*{/', '},{', $raw);
      $raw = '[' . $raw . ']';
      $array = json_decode($raw, TRUE);
    }
    if (!is_array($array)) {
      throw new Exception(t('There was an error decoding the JSON document.'));
    }
    $all_items = $this
      ->jsonPath($array, $source_config['context']);
    unset($array);

    // Batch.
    $state = $source
      ->state(FEEDS_PARSE);
    if (!$state->total) {
      $state->total = count($all_items);
    }
    $start = (int) $state->pointer;
    $state->pointer = $start + $source->importer
      ->getLimit();
    $all_items = array_slice($all_items, $start, $source->importer
      ->getLimit());

    // Set progress state.
    $state
      ->progress($state->total, $state->pointer);

    // Debug output.
    $this
      ->debug($all_items, 'context');
    foreach ($all_items as $item) {

      // Invoke a hook to check whether the item should be skipped.
      if ($this
        ->invokeHook($item, $source) === TRUE) {
        continue;
      }
      $parsed_item = $variables = array();
      foreach ($source_config['sources'] as $source_key => $query) {

        // Variable substitution.
        $query = strtr($query, $variables);
        $parsed = $this
          ->parseSourceElement($item, $query, $source_key);
        $variables['{' . $mappings[$source_key] . '}'] = is_array($parsed) ? reset($parsed) : $parsed;

        // Avoid null values.
        if (isset($parsed)) {
          $parsed_item[$source_key] = $parsed;
        }
      }
      if (!empty($parsed_item)) {
        $result->items[] = $parsed_item;
      }
    }
    return $result;
  }

  /**
   * Searches an array via JSONPath.
   *
   * @param array $array
   *   The input array to parse.
   * @param string $expression
   *   The JSONPath expression.
   *
   * @return array
   *   Returns the parsed jsonpath expression.
   *
   * @throws RuntimeException
   *   In case the parsed json is not an array.
   */
  protected function jsonPath($array, $expression) {
    $result = (new JSONPath($array))
      ->find($expression)
      ->data();

    // If the returned result is empty, just return an empty array.
    if (empty($result)) {
      return array();
    }

    // If the parsed json is not an array, throw an exception.
    if (!is_array($result)) {
      throw new RuntimeException(t('The parsed json must return an array.'));
    }
    return $result;
  }

  /**
   * Parses one item from the context array.
   *
   * @param array $item
   *   An array containing one item from the context.
   * @param string $query
   *   A JSONPath query.
   * @param string $source
   *   The source element that corresponds to the query.
   *
   * @return array
   *   An array containing the results of the query.
   */
  protected function parseSourceElement($item, $query, $source) {
    if (empty($query)) {
      return;
    }
    $results = $this
      ->jsonPath($item, $query);
    $this
      ->debug($results, $source);
    $count = count($results);
    if ($count === 0) {
      return;
    }
    foreach ($results as $delta => $value) {
      if (is_string($value) && $value !== '') {
        $results[$delta] = !empty($this->config['convert_four_byte']) ? $this
          ->convertFourBytes($value) : $this
          ->stripFourBytes($value);
      }
    }
    if ($count === 1) {
      return reset($results);
    }
    return $results;
  }

  /**
   * Source form.
   */
  public function sourceForm($source_config) {
    $form = array();
    if (empty($source_config)) {
      $source_config = $this->config;
    }
    if (isset($source_config['allow_override']) && !$source_config['allow_override'] && empty($source_config['config'])) {
      return;
    }

    // Add extensions that might get importerd.
    $fetcher = feeds_importer($this->id)->fetcher;
    if (isset($fetcher->config['allowed_extensions'])) {
      if (strpos($fetcher->config['allowed_extensions'], 'json') === FALSE) {
        $fetcher->config['allowed_extensions'] .= ' json';
      }
    }
    $mappings_ = feeds_importer($this->id)->processor->config['mappings'];
    $uniques = $mappings = array();
    foreach ($mappings_ as $mapping) {
      if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
        $mappings[$mapping['source']] = $mapping['target'];
        if (!empty($mapping['unique'])) {
          $uniques[] = $mapping['target'];
        }
      }
    }
    $form['jsonpath'] = array(
      '#type' => 'fieldset',
      '#title' => t('JSONPath Parser Settings'),
      '#collapsible' => TRUE,
      '#collapsed' => TRUE,
      '#tree' => TRUE,
    );
    if (empty($mappings)) {

      // Detect if Feeds menu structure has changed. This will take a while to
      // be released, but since I run dev it needs to work.
      $feeds_menu = feeds_ui_menu();
      if (isset($feeds_menu['admin/structure/feeds/list'])) {
        $feeds_base = 'admin/structure/feeds/edit/';
      }
      else {
        $feeds_base = 'admin/structure/feeds/';
      }
      $form['jsonpath']['error_message']['#markup'] = '<div class="help">' . t('No JSONPath mappings are defined. Define mappings !link.', array(
        '!link' => l(t('here'), $feeds_base . $this->id . '/mapping'),
      )) . '</div><br />';
      return $form;
    }
    $form['jsonpath']['context'] = array(
      '#type' => 'textfield',
      '#title' => t('Context'),
      '#required' => TRUE,
      '#description' => t('This is the base query, all other queries will execute in this context.'),
      '#default_value' => isset($source_config['context']) ? $source_config['context'] : '',
      '#maxlength' => 1024,
      '#size' => 80,
    );
    $form['jsonpath']['sources'] = array(
      '#type' => 'fieldset',
    );
    if (!empty($uniques)) {
      $items = array(
        format_plural(count($uniques), t('Field <strong>!column</strong> is mandatory and considered unique: only one item per !column value will be created.', array(
          '!column' => implode(', ', $uniques),
        )), t('Fields <strong>!columns</strong> are mandatory and values in these columns are considered unique: only one entry per value in one of these columns will be created.', array(
          '!columns' => implode(', ', $uniques),
        ))),
      );
      $form['jsonpath']['sources']['help']['#markup'] = '<div class="help">' . theme('item_list', array(
        'items' => $items,
      )) . '</div>';
    }
    $variables = array();
    foreach ($mappings as $source => $target) {
      $form['jsonpath']['sources'][$source] = array(
        '#type' => 'textfield',
        '#title' => $target,
        '#description' => t('The JSONPath expression to execute.'),
        '#default_value' => isset($source_config['sources'][$source]) ? $source_config['sources'][$source] : '',
        '#maxlength' => 1024,
        '#size' => 80,
      );
      if (!empty($variables)) {
        $variable_text = format_plural(count($variables), t('The variable %v is available for replacement.', array(
          '%v' => implode(', ', $variables),
        )), t('The variables %v are available for replacement.', array(
          '%v' => implode(', ', $variables),
        )));
        $form['jsonpath']['sources'][$source]['#description'] .= '<br />' . $variable_text;
      }
      $variables[] = '{' . $target . '}';
    }
    $form['jsonpath']['debug'] = array(
      '#type' => 'fieldset',
      '#title' => t('Debug'),
      '#collapsible' => TRUE,
      '#collapsed' => TRUE,
    );
    $form['jsonpath']['debug']['options'] = array(
      '#type' => 'checkboxes',
      '#title' => t('Debug query'),
      '#options' => array_merge(array(
        'context' => 'context',
      ), $mappings),
      '#default_value' => isset($source_config['debug']['options']) ? $source_config['debug']['options'] : array(),
    );
    return $form;
  }

  /**
   * Override parent::configForm().
   */
  public function configForm(&$form_state) {
    $config = $this
      ->getConfig();
    $config['config'] = TRUE;
    $form = $this
      ->sourceForm($config);
    $form['jsonpath']['context']['#required'] = FALSE;
    $form['jsonpath']['#collapsed'] = FALSE;
    $form['jsonpath']['allow_override'] = array(
      '#type' => 'checkbox',
      '#title' => t('Allow source configuration override'),
      '#description' => t('This setting allows feed nodes to specify their own JSONPath values for the context and sources.'),
      '#default_value' => $config['allow_override'],
    );
    $form['jsonpath']['convert_four_byte'] = array(
      '#type' => 'checkbox',
      '#title' => t('Convert four byte characters'),
      '#description' => t('Coverts four byte UTF-8 characters to their HTML entity. By default, four byte characters will be stripped.'),
      '#default_value' => !empty($config['convert_four_byte']),
    );
    return $form;
  }

  /**
   * Override parent::getMappingSources().
   */
  public function getMappingSources() {
    $mappings = $this
      ->filterMappings(feeds_importer($this->id)->processor->config['mappings']);
    $next = 0;
    if (!empty($mappings)) {
      $keys = array_keys($mappings);
      $nums = array();
      foreach ($keys as $key) {
        list(, $num) = explode(':', $key);
        $nums[] = $num;
      }
      $max = max($nums);
      $next = ++$max;
    }
    return array(
      'jsonpath_parser:' . $next => array(
        'name' => t('JSONPath Expression'),
        'description' => t('Allows you to configure a JSONPath expression that will populate this field.'),
      ),
    ) + parent::getMappingSources();
  }
  public function sourceDefaults() {
    return array();
  }

  /**
   * Define defaults.
   */
  public function configDefaults() {
    return array(
      'context' => '',
      'sources' => array(),
      'debug' => array(),
      'allow_override' => FALSE,
      'convert_four_byte' => FALSE,
    );
  }

  /**
   * Override parent::sourceFormValidate().
   *
   * If the values of this source are the same as the base config we set them to
   * blank to that the values will be inherited from the importer defaults.
   *
   * @param array $values
   *   The values from the form to validate, passed by reference.
   */
  public function sourceFormValidate(&$values) {
    $config = $this
      ->getConfig();
    $values = $values['jsonpath'];
    $allow_override = $config['allow_override'];
    unset($config['allow_override']);
    unset($config['convert_four_byte']);
    ksort($values);
    ksort($config);
    if ($values === $config || !$allow_override) {
      $values = array();
      return;
    }
    $this
      ->configFormValidate($values);
  }

  /**
   * Override parent::sourceFormValidate().
   */
  public function configFormValidate(&$values) {
    if (isset($values['jsonpath'])) {
      $values = $values['jsonpath'];
    }
    $values['context'] = isset($values['context']) ? trim($values['context']) : '';
    if (!empty($values['sources'])) {
      foreach ($values['sources'] as &$source) {
        $source = trim($source);
      }
    }
  }

  /**
   * Gets the mappings that belong to this parser.
   *
   * @return array
   *   An array of mappings keyed source => target.
   */
  protected function getOwnMappings() {
    $importer_config = feeds_importer($this->id)
      ->getConfig();
    return $this
      ->filterMappings($importer_config['processor']['config']['mappings']);
  }

  /**
   * Filters mappings, returning the ones that belong to us.
   *
   * @param array $mappings
   *   A mapping array from a processor.
   *
   * @return array
   *   An array of mappings keyed source => target.
   */
  protected function filterMappings($mappings) {
    $our_mappings = array();
    foreach ($mappings as $mapping) {
      if (strpos($mapping['source'], 'jsonpath_parser:') === 0) {
        $our_mappings[$mapping['source']] = $mapping['target'];
      }
    }
    return $our_mappings;
  }
  protected function debug($item, $source) {
    if (in_array($source, $this->debug)) {
      $o = '<ul>';
      foreach ($item as $i) {
        $o .= '<li>' . check_plain(var_export($i, TRUE)) . '</li>';
      }
      $o .= '</ul>';
      drupal_set_message($source . ':' . $o);
    }
  }

  /**
   * Calls our filter hook.
   *
   * @param array &$item
   *   The item to alter.
   * @param FeedsSource $source
   *   The feed source.
   *
   * @return true|null
   *   Returns true if the item should be skipped.
   */
  protected function invokeHook(array &$item, FeedsSource $source) {
    foreach (module_implements('feeds_jsonpath_parser_filter') as $module) {
      $function = $module . '_feeds_jsonpath_parser_filter';
      if ($function($item, $source) === TRUE) {
        return TRUE;
      }
    }
  }

  /**
   * Strips four byte characters from a string.
   *
   * @param string $string
   *   The input string.
   *
   * @return string
   *   The string with four byte characters removed.
   */
  public static function stripFourBytes($string) {
    return preg_replace(self::$fourByteRegex, '', $string);
  }

  /**
   * Replaces four byte characters with their HTML unicode codepoint.
   *
   * @param string $string
   *   The input string.
   *
   * @return string
   *   The string with four byte characters converted.
   */
  public static function convertFourBytes($string) {
    return preg_replace_callback(self::$fourByteRegex, array(
      'FeedsJSONPathParser',
      'doFourByteReplace',
    ), $string);
  }

  /**
   * Callback for FeedsJSONPathParser::convertFourBytes().
   *
   * @param array $matches
   *   The regular expression matches.
   *
   * @return string
   *   A four byte unicode character converted to its HTML representation.
   */
  public static function doFourByteReplace(array $matches) {
    $char = $matches[0];

    // Calculate the codepoint of the character.
    $codepoint = ord($char[0]) - 0xf0 << 18;
    $codepoint += ord($char[1]) - 0x80 << 12;
    $codepoint += ord($char[2]) - 0x80 << 6;
    $codepoint += ord($char[3]) - 0x80;
    return '&#' . $codepoint . ';';
  }

}

Classes

Namesort descending Description
FeedsJSONPathParser Parses JSON using JSONPath.