You are here

parser_simplepie.module in FeedAPI 6

Same filename and directory in other branches
  1. 5 parser_simplepie/parser_simplepie.module

Parse the incoming URL with SimplePie then provide a data structure of the feed

File

parser_simplepie/parser_simplepie.module
View source
<?php

/**
 * @file
 * Parse the incoming URL with SimplePie then provide a data structure of the feed
 */

/**
 * Implementation of hook_help().
 */
function parser_simplepie_help($path, $arg) {
  switch ($path) {
    case 'admin/modules#description':
      return t('Provide a common syndication parser for FeedAPI-compatible modules powered by SimplePie library.');
    case 'feedapi/full_name':
      return t('Parser SimplePie');
  }
}

/**
 * Delete cache validating functions when feed is deleted
 */
function parser_simplepie_nodeapi(&$node, $op) {
  if (isset($node->feed) || feedapi_enabled_type($node->type)) {
    switch ($op) {
      case 'delete':
        $cache_file = _parser_simplepie_sanitize_cache() . '/' . md5($node->feed->url) . '.spc';
        if (file_exists($cache_file)) {
          unlink($cache_file);
        }
        break;
    }
  }
}

/**
 * Implementation of hook_feedapi_feed().
 */
function parser_simplepie_feedapi_feed($op) {
  $args = func_get_args();

  // Validate the URL, if it is not basically valid, why send to simplepie object
  $url_parts = parse_url(is_string($args[1]) ? $args[1] : $args[1]->url);
  $settings = isset($args[2]) ? $args[2] : array();
  $valid_ip_regex = "^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(\\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}^";
  $op = gethostbyname($url_parts['host']) !== $url_parts['host'] || preg_match($valid_ip_regex, $url_parts['host']) > 0 ? $op : FALSE;
  switch ($op) {
    case 'type':
      return array(
        "XML feed",
      );
    case 'compatible':

      // Stop gap for simplefeed.inc version <= 1.1
      return "XML feed";
      $url = $args[1]->url;

      // Here we do not allow caching. Otherwise simplepie's cache prevents FeedAPI to process the feed
      $parser = _parser_simplepie_get_parser($url, FALSE);
      if ($parser->error) {
        return FALSE;
      }
      return array_shift(parser_simplepie_feedapi_feed('type'));
    case 'parse':
      $feed = is_object($args[1]) ? $args[1] : FALSE;
      return _parser_simplepie_feedapi_parse($feed, $settings);
  }
  return FALSE;
}

/**
 * Implementation of hook_feedapi_settings_form().
 * If a module provides parsers and processors it MUST evaluate the $type variable
 * to return different forms for parsers and processors.
 * There might be a better term for parsers and processors than $type.
 */
function parser_simplepie_feedapi_settings_form($type) {
  $form = array();
  switch ($type) {
    case 'parsers':
      $form['cache_lifetime'] = array(
        '#type' => 'textfield',
        '#title' => t('Cache lifetime in seconds'),
        '#description' => t('Lower this value if you know that your feeds refresh more often and you don\'t want to miss items.'),
        '#default_value' => '3600',
      );
      break;
  }
  return $form;
}

/**
 * This function simplifies a raw feed item.
 */
function parser_simplepie_simplify_raw_item($raw_item) {
  if ($item = array_shift($raw_item['child'])) {
    foreach ($item as $k => $v) {
      if (count($item[$k]) > 1) {
        foreach ($item[$k] as $j => $u) {
          $simple_item[$k][] = $item[$k][$j]['data'];
        }
      }
      else {
        $simple_item[$k] = $item[$k][0]['data'];
      }
    }
    return $simple_item;
  }
  return $raw_item;
}

/**
 * Parsing the feed
 *
 * @param $url
 *  The feed's url
 * @return
 *  The structured datas extracted from the feed
 */
function _parser_simplepie_feedapi_parse($feed, $settings = array()) {
  $parser = _parser_simplepie_get_parser($feed->url, TRUE, $settings);
  if ($parser->error) {
    return FALSE;
  }

  // Do we have html_entity_decode? Some feeds return html entities in the links
  $entity_decode = function_exists('html_entity_decode');

  // Construct the standard form of the parsed feed
  $parsed_source = new stdClass();
  $parsed_source->description = $parser
    ->get_description();
  $parsed_source->title = _parser_simplepie_title($parser
    ->get_title(), $parser
    ->get_description());
  $parsed_source->options = new stdClass();
  $parsed_source->options->link = $entity_decode && $parser
    ->get_link() ? html_entity_decode($parser
    ->get_link()) : $parser
    ->get_link();
  $parsed_source->items = array();
  $items_num = $parser
    ->get_item_quantity();
  for ($i = 0; $i < $items_num; $i++) {
    $curr_item = new stdClass();
    $simplepie_item = $parser
      ->get_item($i);
    $curr_item->title = _parser_simplepie_title($simplepie_item
      ->get_title(), $simplepie_item
      ->get_content());
    $curr_item->description = $simplepie_item
      ->get_content();
    $curr_item->options = new stdClass();
    $curr_item->options->original_url = $entity_decode && $simplepie_item
      ->get_link() ? html_entity_decode($simplepie_item
      ->get_link()) : $simplepie_item
      ->get_link();

    // U = std. unix timestamp
    $curr_item->options->timestamp = $simplepie_item
      ->get_date("U");
    $curr_item->options->guid = $simplepie_item
      ->get_id();
    $curr_item->options->original_author = $simplepie_item
      ->get_author();

    // Enclosures
    $enclosures = $simplepie_item
      ->get_enclosures();
    if (is_array($enclosures)) {
      foreach ($enclosures as $enclosure) {
        $mime = $enclosure
          ->get_real_type();
        if ($mime != '') {
          list($type, $subtype) = split('/', $mime);
          $curr_item->options->enclosures[$type][$subtype][] = $enclosure;
        }
      }
    }

    // Location
    $latitude = $simplepie_item
      ->get_latitude();
    $longitude = $simplepie_item
      ->get_longitude();
    if (!is_null($latitude) && !is_null($longitude)) {
      $curr_item->options->location->latitude[] = $latitude;
      $curr_item->options->location->longitude[] = $longitude;
    }

    // Extract tags related to the item
    $simplepie_tags = $simplepie_item
      ->get_categories();
    $tags = array();
    $domains = array();
    if (count($simplepie_tags) > 0) {
      foreach ($simplepie_tags as $tag) {
        $tags[] = (string) $tag->term;
        $domain = (string) $tag
          ->get_scheme();
        if (!empty($domain)) {
          if (!isset($domains[$domain])) {
            $domains[$domain] = array();
          }
          $domains[$domain][] = count($tags) - 1;
        }
      }
    }
    $curr_item->options->domains = $domains;
    $curr_item->options->tags = $tags;

    // Stick the raw data onto the feed item.
    $curr_item->options->raw = $simplepie_item->data;
    $parsed_source->items[] = $curr_item;
  }
  return $parsed_source;
}

/**
 * Set the default caching directory if the current setting is not useable
 */
function _parser_simplepie_sanitize_cache() {
  $cache_location = file_directory_path() . '/simplepie_cache';
  if (!is_writeable($cache_location) || !is_dir($cache_location)) {
    $cache_location = file_create_path($cache_location);
    if (!file_exists($cache_location) && is_writable(file_directory_path())) {
      mkdir($cache_location);
    }
    if (!is_writeable($cache_location)) {
      return FALSE;
    }
  }
  return $cache_location;
}

/**
 * Set SimplePie setting
 * @param $url
 *   The feed's url
 * @param $enable_cache
 *   If FALSE, the caching is disabled
 * @param $settings
 *
 * @return
 *   SimplePie object
 */
function _parser_simplepie_get_parser($url, $enable_cache = TRUE, $settings = array()) {
  if (!class_exists('SimplePie')) {
    if (module_exists('libraries') && file_exists(libraries_get_path('simplepie') . '/simplepie.inc')) {
      $path = libraries_get_path('simplepie') . '/simplepie.inc';
    }
    else {
      $path = drupal_get_path('module', 'parser_simplepie') . '/simplepie.inc';
    }
    require_once $path;
  }
  $parser = new SimplePie();
  $parser
    ->set_feed_url($url);
  $parser
    ->set_timeout(15);
  if (isset($settings['cache_lifetime'])) {
    $parser
      ->set_cache_duration($settings['cache_lifetime']);
  }
  $parser
    ->set_stupidly_fast(TRUE);
  $parser
    ->encode_instead_of_strip(FALSE);
  $cache_location = _parser_simplepie_sanitize_cache();
  $parser
    ->enable_cache($cache_location !== FALSE ? $enable_cache : FALSE);
  $parser
    ->set_cache_location($cache_location);
  $parser
    ->init();
  return $parser;
}

/**
 * Prepare raw data to be a title
 */
function _parser_simplepie_title($title, $body = FALSE) {
  if (empty($title) && !empty($body)) {

    // Explode to words and use the first 3 words.
    $words = preg_split("/[\\s,]+/", $body);
    $title = $words[0] . ' ' . $words[1] . ' ' . $words[2];
  }
  return $title;
}

Functions

Namesort descending Description
parser_simplepie_feedapi_feed Implementation of hook_feedapi_feed().
parser_simplepie_feedapi_settings_form Implementation of hook_feedapi_settings_form(). If a module provides parsers and processors it MUST evaluate the $type variable to return different forms for parsers and processors. There might be a better term for parsers and processors than $type.
parser_simplepie_help Implementation of hook_help().
parser_simplepie_nodeapi Delete cache validating functions when feed is deleted
parser_simplepie_simplify_raw_item This function simplifies a raw feed item.
_parser_simplepie_feedapi_parse Parsing the feed
_parser_simplepie_get_parser Set SimplePie setting
_parser_simplepie_sanitize_cache Set the default caching directory if the current setting is not useable
_parser_simplepie_title Prepare raw data to be a title