parser_simplepie.module in FeedAPI 5
Same filename and directory in other branches
Parse the incoming URL with SimplePie then provide a data structure of the feed
File
parser_simplepie/parser_simplepie.moduleView source
<?php
/**
* @file
* Parse the incoming URL with SimplePie then provide a data structure of the feed
*/
/**
* Implementation of hook_help().
*/
function parser_simplepie_help($section) {
switch ($section) {
case 'admin/modules#description':
return t('Provide a common syndication parser for FeedAPI-compatible modules powered by SimplePie library.');
case 'feedapi/full_name':
return t('Parser SimplePie');
}
}
/**
* Implementation of hook_requirements().
*/
function parser_simplepie_requirements($phase) {
$requirements = array();
// Ensure translations don't break at install time
$t = get_t();
switch ($phase) {
case 'install':
case 'runtime':
$path = drupal_get_path('module', 'parser_simplepie') . '/simplepie.inc';
if (!file_exists($path)) {
$requirements['simplepie'] = array(
'title' => $t("FeedAPI SimplePie"),
'description' => $t("Obtain the !simplepie package and copy simplepie.inc to the parser_simplepie directory.", array(
'!simplepie' => l('SimplePie', 'http://simplepie.org/downloads', array(), NULL, NULL, TRUE),
)),
'severity' => $phase == 'install' ? REQUIREMENT_WARNING : REQUIREMENT_ERROR,
'value' => $t('simplepie.inc file missing'),
);
}
elseif ($phase == 'runtime') {
require_once $path;
$requirements['simplepie'] = array(
'title' => $t('SimplePie Parser'),
'description' => t('The current installed version of SimplePie is !version', array(
'!version' => '<strong>' . SIMPLEPIE_VERSION . '</strong>',
)),
'severity' => REQUIREMENT_OK,
'value' => $t('Installed correctly'),
);
}
}
return $requirements;
}
/**
* Delete cache validating functions when feed is deleted
*/
function parser_simplepie_nodeapi(&$node, $op) {
if (isset($node->feed) || feedapi_enabled_type($node->type)) {
switch ($op) {
case 'delete':
$cache_file = _parser_simplepie_sanitize_cache() . '/' . md5($node->feed->url) . '.spc';
if (file_exists($cache_file)) {
unlink($cache_file);
}
break;
}
}
}
/**
* Implementation of hook_feedapi_feed().
*/
function parser_simplepie_feedapi_feed($op) {
$args = func_get_args();
// Validate the URL, if it is not basically valid, why send to simplepie object
$url_parts = parse_url(is_string($args[1]) ? $args[1] : $args[1]->url);
$valid_ip_regex = "^([1-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])(\\.([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])){3}^";
$op = gethostbyname($url_parts['host']) !== $url_parts['host'] || preg_match($valid_ip_regex, $url_parts['host']) > 0 ? $op : FALSE;
switch ($op) {
case 'type':
return array(
"XML feed",
);
case 'compatible':
// Stop gap for simplefeed.inc version <= 1.1
return "XML feed";
$url = $args[1]->url;
// Here we do not allow caching. Otherwise simplepie's cache prevents FeedAPI to process the feed
$parser = _parser_simplepie_get_parser($url, FALSE);
if ($parser->error) {
return FALSE;
}
return array_shift(parser_simplepie_feedapi_feed('type'));
case 'parse':
$feed = is_object($args[1]) ? $args[1] : FALSE;
return _parser_simplepie_feedapi_parse($feed);
}
return FALSE;
}
/**
* This function simplifies a raw feed item.
*/
function parser_simplepie_simplify_raw_item($raw_item) {
if ($item = array_shift($raw_item['child'])) {
foreach ($item as $k => $v) {
if (count($item[$k]) > 1) {
foreach ($item[$k] as $j => $u) {
$simple_item[$k][] = $item[$k][$j]['data'];
}
}
else {
$simple_item[$k] = $item[$k][0]['data'];
}
}
return $simple_item;
}
return $raw_item;
}
/**
* Parsing the feed
*
* @param $url
* The feed's url
* @return
* The structured datas extracted from the feed
*/
function _parser_simplepie_feedapi_parse($feed) {
$parser = _parser_simplepie_get_parser($feed->url);
if ($parser->error) {
return FALSE;
}
// Do we have html_entity_decode? Some feeds return html entities in the links
$entity_decode = function_exists('html_entity_decode');
// Construct the standard form of the parsed feed
$parsed_source = new stdClass();
$parsed_source->title = _parser_simplepie_title($parser
->get_title());
$parsed_source->description = $parser
->get_description();
$parsed_source->options = new stdClass();
$parsed_source->options->link = $entity_decode ? html_entity_decode($parser
->get_link()) : $parser
->get_link();
$parsed_source->items = array();
$items_num = $parser
->get_item_quantity();
for ($i = 0; $i < $items_num; $i++) {
$curr_item = new stdClass();
$simplepie_item = $parser
->get_item($i);
$curr_item->title = _parser_simplepie_title($simplepie_item
->get_title());
$curr_item->description = $simplepie_item
->get_content();
$curr_item->options = new stdClass();
$curr_item->options->original_url = $entity_decode ? html_entity_decode($simplepie_item
->get_link()) : $simplepie_item
->get_link();
// U = std. unix timestamp
$curr_item->options->timestamp = $simplepie_item
->get_date("U");
$curr_item->options->guid = $simplepie_item
->get_id();
$curr_item->options->original_author = $simplepie_item
->get_author();
// Enclosures
$enclosures = $simplepie_item
->get_enclosures();
if (is_array($enclosures)) {
foreach ($enclosures as $enclosure) {
$mime = $enclosure
->get_real_type();
if ($mime != '') {
list($type, $subtype) = split('/', $mime);
$curr_item->options->enclosures[$type][$subtype][] = $enclosure;
}
}
}
// Location
$latitude = $simplepie_item
->get_latitude();
$longitude = $simplepie_item
->get_longitude();
if (!is_null($latitude) && !is_null($longitude)) {
$curr_item->options->location->latitude[] = $latitude;
$curr_item->options->location->longitude[] = $longitude;
}
// Extract tags related to the item
$simplepie_tags = $simplepie_item
->get_categories();
$tags = array();
$domains = array();
if (count($simplepie_tags) > 0) {
foreach ($simplepie_tags as $tag) {
$domain = (string) $tag
->get_scheme();
$tags[] = (string) $tag->term;
if (!empty($domain)) {
if (!isset($domains[$domain])) {
$domains[$domain] = array();
}
$domains[$domain][] = count($tags) - 1;
}
}
}
$curr_item->options->tags = $tags;
$curr_item->options->domains = $domains;
// Stick the raw data onto the feed item.
$curr_item->options->raw = $simplepie_item->data;
$parsed_source->items[] = $curr_item;
}
return $parsed_source;
}
/**
* Set the default caching directory if the current setting is not useable
*/
function _parser_simplepie_sanitize_cache() {
$cache_location = file_directory_path() . '/simplepie_cache';
if (!is_writeable($cache_location) || !is_dir($cache_location)) {
$cache_location = file_create_path($cache_location);
if (!file_exists($cache_location) && is_writable(file_directory_path())) {
mkdir($cache_location);
}
if (!is_writeable($cache_location)) {
return FALSE;
}
}
return $cache_location;
}
/**
* Set SimplePie setting
* @param $url
* The feed's url
* @return
* SimplePie object
*/
function _parser_simplepie_get_parser($url) {
require_once drupal_get_path('module', 'parser_simplepie') . '/simplepie.inc';
$parser = new SimplePie();
$parser
->set_feed_url($url);
$parser
->set_timeout(15);
$parser
->set_stupidly_fast(TRUE);
$parser
->encode_instead_of_strip(FALSE);
$cache_location = _parser_simplepie_sanitize_cache();
$parser
->enable_cache($cache_location !== FALSE ? TRUE : FALSE);
$parser
->set_cache_location($cache_location);
$parser
->init();
return $parser;
}
/**
* Prepare raw data to be a title
*/
function _parser_simplepie_title($title) {
if (version_compare(PHP_VERSION, '5.0.0', '<')) {
return html_entity_decode(strip_tags($title), ENT_QUOTES);
}
else {
return html_entity_decode(strip_tags($title), ENT_QUOTES, 'UTF-8');
}
}
Functions
Name | Description |
---|---|
parser_simplepie_feedapi_feed | Implementation of hook_feedapi_feed(). |
parser_simplepie_help | Implementation of hook_help(). |
parser_simplepie_nodeapi | Delete cache validating functions when feed is deleted |
parser_simplepie_requirements | Implementation of hook_requirements(). |
parser_simplepie_simplify_raw_item | This function simplifies a raw feed item. |
_parser_simplepie_feedapi_parse | Parsing the feed |
_parser_simplepie_get_parser | Set SimplePie setting |
_parser_simplepie_sanitize_cache | Set the default caching directory if the current setting is not useable |
_parser_simplepie_title | Prepare raw data to be a title |