parser_simplepie.module in FeedAPI 6
Same filename and directory in other branches
Parse the incoming URL with SimplePie then provide a data structure of the feed
File
parser_simplepie/parser_simplepie.moduleView source
<?php
/**
* @file
* Parse the incoming URL with SimplePie then provide a data structure of the feed
*/
/**
* Implementation of hook_help().
*/
function parser_simplepie_help($path, $arg) {
switch ($path) {
case 'admin/modules#description':
return t('Provide a common syndication parser for FeedAPI-compatible modules powered by SimplePie library.');
case 'feedapi/full_name':
return t('Parser SimplePie');
}
}
/**
* Delete cache validating functions when feed is deleted
*/
function parser_simplepie_nodeapi(&$node, $op) {
if (isset($node->feed) || feedapi_enabled_type($node->type)) {
switch ($op) {
case 'delete':
$cache_file = _parser_simplepie_sanitize_cache() . '/' . md5($node->feed->url) . '.spc';
if (file_exists($cache_file)) {
unlink($cache_file);
}
break;
}
}
}
/**
* Implementation of hook_feedapi_feed().
*/
function parser_simplepie_feedapi_feed($op) {
$args = func_get_args();
// Validate the URL, if it is not basically valid, why send to simplepie object
$url_parts = parse_url(is_string($args[1]) ? $args[1] : $args[1]->url);
$settings = isset($args[2]) ? $args[2] : array();
$valid_ip_regex = "^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(\\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}^";
$op = gethostbyname($url_parts['host']) !== $url_parts['host'] || preg_match($valid_ip_regex, $url_parts['host']) > 0 ? $op : FALSE;
switch ($op) {
case 'type':
return array(
"XML feed",
);
case 'compatible':
// Stop gap for simplefeed.inc version <= 1.1
return "XML feed";
$url = $args[1]->url;
// Here we do not allow caching. Otherwise simplepie's cache prevents FeedAPI to process the feed
$parser = _parser_simplepie_get_parser($url, FALSE);
if ($parser->error) {
return FALSE;
}
return array_shift(parser_simplepie_feedapi_feed('type'));
case 'parse':
$feed = is_object($args[1]) ? $args[1] : FALSE;
return _parser_simplepie_feedapi_parse($feed, $settings);
}
return FALSE;
}
/**
* Implementation of hook_feedapi_settings_form().
* If a module provides parsers and processors it MUST evaluate the $type variable
* to return different forms for parsers and processors.
* There might be a better term for parsers and processors than $type.
*/
function parser_simplepie_feedapi_settings_form($type) {
$form = array();
switch ($type) {
case 'parsers':
$form['cache_lifetime'] = array(
'#type' => 'textfield',
'#title' => t('Cache lifetime in seconds'),
'#description' => t('Lower this value if you know that your feeds refresh more often and you don\'t want to miss items.'),
'#default_value' => '3600',
);
break;
}
return $form;
}
/**
* This function simplifies a raw feed item.
*/
function parser_simplepie_simplify_raw_item($raw_item) {
if ($item = array_shift($raw_item['child'])) {
foreach ($item as $k => $v) {
if (count($item[$k]) > 1) {
foreach ($item[$k] as $j => $u) {
$simple_item[$k][] = $item[$k][$j]['data'];
}
}
else {
$simple_item[$k] = $item[$k][0]['data'];
}
}
return $simple_item;
}
return $raw_item;
}
/**
* Parsing the feed
*
* @param $url
* The feed's url
* @return
* The structured datas extracted from the feed
*/
function _parser_simplepie_feedapi_parse($feed, $settings = array()) {
$parser = _parser_simplepie_get_parser($feed->url, TRUE, $settings);
if ($parser->error) {
return FALSE;
}
// Do we have html_entity_decode? Some feeds return html entities in the links
$entity_decode = function_exists('html_entity_decode');
// Construct the standard form of the parsed feed
$parsed_source = new stdClass();
$parsed_source->description = $parser
->get_description();
$parsed_source->title = _parser_simplepie_title($parser
->get_title(), $parser
->get_description());
$parsed_source->options = new stdClass();
$parsed_source->options->link = $entity_decode && $parser
->get_link() ? html_entity_decode($parser
->get_link()) : $parser
->get_link();
$parsed_source->items = array();
$items_num = $parser
->get_item_quantity();
for ($i = 0; $i < $items_num; $i++) {
$curr_item = new stdClass();
$simplepie_item = $parser
->get_item($i);
$curr_item->title = _parser_simplepie_title($simplepie_item
->get_title(), $simplepie_item
->get_content());
$curr_item->description = $simplepie_item
->get_content();
$curr_item->options = new stdClass();
$curr_item->options->original_url = $entity_decode && $simplepie_item
->get_link() ? html_entity_decode($simplepie_item
->get_link()) : $simplepie_item
->get_link();
// U = std. unix timestamp
$curr_item->options->timestamp = $simplepie_item
->get_date("U");
$curr_item->options->guid = $simplepie_item
->get_id();
$curr_item->options->original_author = $simplepie_item
->get_author();
// Enclosures
$enclosures = $simplepie_item
->get_enclosures();
if (is_array($enclosures)) {
foreach ($enclosures as $enclosure) {
$mime = $enclosure
->get_real_type();
if ($mime != '') {
list($type, $subtype) = split('/', $mime);
$curr_item->options->enclosures[$type][$subtype][] = $enclosure;
}
}
}
// Location
$latitude = $simplepie_item
->get_latitude();
$longitude = $simplepie_item
->get_longitude();
if (!is_null($latitude) && !is_null($longitude)) {
$curr_item->options->location->latitude[] = $latitude;
$curr_item->options->location->longitude[] = $longitude;
}
// Extract tags related to the item
$simplepie_tags = $simplepie_item
->get_categories();
$tags = array();
$domains = array();
if (count($simplepie_tags) > 0) {
foreach ($simplepie_tags as $tag) {
$tags[] = (string) $tag->term;
$domain = (string) $tag
->get_scheme();
if (!empty($domain)) {
if (!isset($domains[$domain])) {
$domains[$domain] = array();
}
$domains[$domain][] = count($tags) - 1;
}
}
}
$curr_item->options->domains = $domains;
$curr_item->options->tags = $tags;
// Stick the raw data onto the feed item.
$curr_item->options->raw = $simplepie_item->data;
$parsed_source->items[] = $curr_item;
}
return $parsed_source;
}
/**
* Set the default caching directory if the current setting is not useable
*/
function _parser_simplepie_sanitize_cache() {
$cache_location = file_directory_path() . '/simplepie_cache';
if (!is_writeable($cache_location) || !is_dir($cache_location)) {
$cache_location = file_create_path($cache_location);
if (!file_exists($cache_location) && is_writable(file_directory_path())) {
mkdir($cache_location);
}
if (!is_writeable($cache_location)) {
return FALSE;
}
}
return $cache_location;
}
/**
* Set SimplePie setting
* @param $url
* The feed's url
* @param $enable_cache
* If FALSE, the caching is disabled
* @param $settings
*
* @return
* SimplePie object
*/
function _parser_simplepie_get_parser($url, $enable_cache = TRUE, $settings = array()) {
if (!class_exists('SimplePie')) {
if (module_exists('libraries') && file_exists(libraries_get_path('simplepie') . '/simplepie.inc')) {
$path = libraries_get_path('simplepie') . '/simplepie.inc';
}
else {
$path = drupal_get_path('module', 'parser_simplepie') . '/simplepie.inc';
}
require_once $path;
}
$parser = new SimplePie();
$parser
->set_feed_url($url);
$parser
->set_timeout(15);
if (isset($settings['cache_lifetime'])) {
$parser
->set_cache_duration($settings['cache_lifetime']);
}
$parser
->set_stupidly_fast(TRUE);
$parser
->encode_instead_of_strip(FALSE);
$cache_location = _parser_simplepie_sanitize_cache();
$parser
->enable_cache($cache_location !== FALSE ? $enable_cache : FALSE);
$parser
->set_cache_location($cache_location);
$parser
->init();
return $parser;
}
/**
* Prepare raw data to be a title
*/
function _parser_simplepie_title($title, $body = FALSE) {
if (empty($title) && !empty($body)) {
// Explode to words and use the first 3 words.
$words = preg_split("/[\\s,]+/", $body);
$title = $words[0] . ' ' . $words[1] . ' ' . $words[2];
}
return $title;
}
Functions
Name | Description |
---|---|
parser_simplepie_feedapi_feed | Implementation of hook_feedapi_feed(). |
parser_simplepie_feedapi_settings_form | Implementation of hook_feedapi_settings_form(). If a module provides parsers and processors it MUST evaluate the $type variable to return different forms for parsers and processors. There might be a better term for parsers and processors than $type. |
parser_simplepie_help | Implementation of hook_help(). |
parser_simplepie_nodeapi | Delete cache validating functions when feed is deleted |
parser_simplepie_simplify_raw_item | This function simplifies a raw feed item. |
_parser_simplepie_feedapi_parse | Parsing the feed |
_parser_simplepie_get_parser | Set SimplePie setting |
_parser_simplepie_sanitize_cache | Set the default caching directory if the current setting is not useable |
_parser_simplepie_title | Prepare raw data to be a title |