You are here

parser_simplepie.module in FeedAPI 5

Same filename and directory in other branches
  1. 6 parser_simplepie/parser_simplepie.module

Parse the incoming URL with SimplePie then provide a data structure of the feed

File

parser_simplepie/parser_simplepie.module
View source
<?php

/**
 * @file
 * Parse the incoming URL with SimplePie then provide a data structure of the feed
 */

/**
 * Implementation of hook_help().
 */
function parser_simplepie_help($section) {
  switch ($section) {
    case 'admin/modules#description':
      return t('Provide a common syndication parser for FeedAPI-compatible modules powered by SimplePie library.');
    case 'feedapi/full_name':
      return t('Parser SimplePie');
  }
}

/**
 * Implementation of hook_requirements().
 */
function parser_simplepie_requirements($phase) {
  $requirements = array();

  // Ensure translations don't break at install time
  $t = get_t();
  switch ($phase) {
    case 'install':
    case 'runtime':
      $path = drupal_get_path('module', 'parser_simplepie') . '/simplepie.inc';
      if (!file_exists($path)) {
        $requirements['simplepie'] = array(
          'title' => $t("FeedAPI SimplePie"),
          'description' => $t("Obtain the !simplepie package and copy simplepie.inc to the parser_simplepie directory.", array(
            '!simplepie' => l('SimplePie', 'http://simplepie.org/downloads', array(), NULL, NULL, TRUE),
          )),
          'severity' => $phase == 'install' ? REQUIREMENT_WARNING : REQUIREMENT_ERROR,
          'value' => $t('simplepie.inc file missing'),
        );
      }
      elseif ($phase == 'runtime') {
        require_once $path;
        $requirements['simplepie'] = array(
          'title' => $t('SimplePie Parser'),
          'description' => t('The current installed version of SimplePie is !version', array(
            '!version' => '<strong>' . SIMPLEPIE_VERSION . '</strong>',
          )),
          'severity' => REQUIREMENT_OK,
          'value' => $t('Installed correctly'),
        );
      }
  }
  return $requirements;
}

/**
 * Delete cache validating functions when feed is deleted
 */
function parser_simplepie_nodeapi(&$node, $op) {
  if (isset($node->feed) || feedapi_enabled_type($node->type)) {
    switch ($op) {
      case 'delete':
        $cache_file = _parser_simplepie_sanitize_cache() . '/' . md5($node->feed->url) . '.spc';
        if (file_exists($cache_file)) {
          unlink($cache_file);
        }
        break;
    }
  }
}

/**
 * Implementation of hook_feedapi_feed().
 */
function parser_simplepie_feedapi_feed($op) {
  $args = func_get_args();

  // Validate the URL, if it is not basically valid, why send to simplepie object
  $url_parts = parse_url(is_string($args[1]) ? $args[1] : $args[1]->url);
  $valid_ip_regex = "^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(\\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}^";
  $op = gethostbyname($url_parts['host']) !== $url_parts['host'] || preg_match($valid_ip_regex, $url_parts['host']) > 0 ? $op : FALSE;
  switch ($op) {
    case 'type':
      return array(
        "XML feed",
      );
    case 'compatible':

      // Stop gap for simplefeed.inc version <= 1.1
      return "XML feed";
      $url = $args[1]->url;

      // Here we do not allow caching. Otherwise simplepie's cache prevents FeedAPI to process the feed
      $parser = _parser_simplepie_get_parser($url, FALSE);
      if ($parser->error) {
        return FALSE;
      }
      return array_shift(parser_simplepie_feedapi_feed('type'));
    case 'parse':
      $feed = is_object($args[1]) ? $args[1] : FALSE;
      return _parser_simplepie_feedapi_parse($feed);
  }
  return FALSE;
}

/**
 * This function simplifies a raw feed item.
 */
function parser_simplepie_simplify_raw_item($raw_item) {
  if ($item = array_shift($raw_item['child'])) {
    foreach ($item as $k => $v) {
      if (count($item[$k]) > 1) {
        foreach ($item[$k] as $j => $u) {
          $simple_item[$k][] = $item[$k][$j]['data'];
        }
      }
      else {
        $simple_item[$k] = $item[$k][0]['data'];
      }
    }
    return $simple_item;
  }
  return $raw_item;
}

/**
 * Parsing the feed
 *
 * @param $url
 *  The feed's url
 * @return
 *  The structured datas extracted from the feed
 */
function _parser_simplepie_feedapi_parse($feed) {
  $parser = _parser_simplepie_get_parser($feed->url);
  if ($parser->error) {
    return FALSE;
  }

  // Do we have html_entity_decode? Some feeds return html entities in the links
  $entity_decode = function_exists('html_entity_decode');

  // Construct the standard form of the parsed feed
  $parsed_source = new stdClass();
  $parsed_source->title = _parser_simplepie_title($parser
    ->get_title());
  $parsed_source->description = $parser
    ->get_description();
  $parsed_source->options = new stdClass();
  $parsed_source->options->link = $entity_decode ? html_entity_decode($parser
    ->get_link()) : $parser
    ->get_link();
  $parsed_source->items = array();
  $items_num = $parser
    ->get_item_quantity();
  for ($i = 0; $i < $items_num; $i++) {
    $curr_item = new stdClass();
    $simplepie_item = $parser
      ->get_item($i);
    $curr_item->title = _parser_simplepie_title($simplepie_item
      ->get_title());
    $curr_item->description = $simplepie_item
      ->get_content();
    $curr_item->options = new stdClass();
    $curr_item->options->original_url = $entity_decode ? html_entity_decode($simplepie_item
      ->get_link()) : $simplepie_item
      ->get_link();

    // U = std. unix timestamp
    $curr_item->options->timestamp = $simplepie_item
      ->get_date("U");
    $curr_item->options->guid = $simplepie_item
      ->get_id();
    $curr_item->options->original_author = $simplepie_item
      ->get_author();

    // Enclosures
    $enclosures = $simplepie_item
      ->get_enclosures();
    if (is_array($enclosures)) {
      foreach ($enclosures as $enclosure) {
        $mime = $enclosure
          ->get_real_type();
        if ($mime != '') {
          list($type, $subtype) = split('/', $mime);
          $curr_item->options->enclosures[$type][$subtype][] = $enclosure;
        }
      }
    }

    // Location
    $latitude = $simplepie_item
      ->get_latitude();
    $longitude = $simplepie_item
      ->get_longitude();
    if (!is_null($latitude) && !is_null($longitude)) {
      $curr_item->options->location->latitude[] = $latitude;
      $curr_item->options->location->longitude[] = $longitude;
    }

    // Extract tags related to the item
    $simplepie_tags = $simplepie_item
      ->get_categories();
    $tags = array();
    $domains = array();
    if (count($simplepie_tags) > 0) {
      foreach ($simplepie_tags as $tag) {
        $domain = (string) $tag
          ->get_scheme();
        $tags[] = (string) $tag->term;
        if (!empty($domain)) {
          if (!isset($domains[$domain])) {
            $domains[$domain] = array();
          }
          $domains[$domain][] = count($tags) - 1;
        }
      }
    }
    $curr_item->options->tags = $tags;
    $curr_item->options->domains = $domains;

    // Stick the raw data onto the feed item.
    $curr_item->options->raw = $simplepie_item->data;
    $parsed_source->items[] = $curr_item;
  }
  return $parsed_source;
}

/**
 * Set the default caching directory if the current setting is not useable
 */
function _parser_simplepie_sanitize_cache() {
  $cache_location = file_directory_path() . '/simplepie_cache';
  if (!is_writeable($cache_location) || !is_dir($cache_location)) {
    $cache_location = file_create_path($cache_location);
    if (!file_exists($cache_location) && is_writable(file_directory_path())) {
      mkdir($cache_location);
    }
    if (!is_writeable($cache_location)) {
      return FALSE;
    }
  }
  return $cache_location;
}

/**
 * Set SimplePie setting
 * @param $url
 *  The feed's url
 * @return
 *  SimplePie object
 */
function _parser_simplepie_get_parser($url) {
  require_once drupal_get_path('module', 'parser_simplepie') . '/simplepie.inc';
  $parser = new SimplePie();
  $parser
    ->set_feed_url($url);
  $parser
    ->set_timeout(15);
  $parser
    ->set_stupidly_fast(TRUE);
  $parser
    ->encode_instead_of_strip(FALSE);
  $cache_location = _parser_simplepie_sanitize_cache();
  $parser
    ->enable_cache($cache_location !== FALSE ? TRUE : FALSE);
  $parser
    ->set_cache_location($cache_location);
  $parser
    ->init();
  return $parser;
}

/**
 * Prepare raw data to be a title
 */
function _parser_simplepie_title($title) {
  if (version_compare(PHP_VERSION, '5.0.0', '<')) {
    return html_entity_decode(strip_tags($title), ENT_QUOTES);
  }
  else {
    return html_entity_decode(strip_tags($title), ENT_QUOTES, 'UTF-8');
  }
}

Functions

Namesort descending Description
parser_simplepie_feedapi_feed Implementation of hook_feedapi_feed().
parser_simplepie_help Implementation of hook_help().
parser_simplepie_nodeapi Delete cache validating functions when feed is deleted
parser_simplepie_requirements Implementation of hook_requirements().
parser_simplepie_simplify_raw_item This function simplifies a raw feed item.
_parser_simplepie_feedapi_parse Parsing the feed
_parser_simplepie_get_parser Set SimplePie setting
_parser_simplepie_sanitize_cache Set the default caching directory if the current setting is not useable
_parser_simplepie_title Prepare raw data to be a title