function asset_search_parse_feed in Asset 6
Same name and namespace in other branches
- 5.2 contrib/asset_search/asset_search.parser.inc \asset_search_parse_feed()
Parse an rss feed and return an array of items Taken from aggregator_parse_feed
1 call to asset_search_parse_feed()
- asset_search_fetch in contrib/
asset_search/ asset_search.module - Fetch a RSS feed and return its items as pseudo-assets
File
- contrib/
asset_search/ asset_search.parser.inc, line 13 - Include file to parse RSS feeds into an array of items that will then be used as pseudo-assets.
Code
function asset_search_parse_feed(&$data, $type, $value) {
global $items, $image, $channel;
$tmp_assets = array();
$type = asset_search_types($type);
// Unset the global variables before we use them:
unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']);
$items = array();
$image = array();
$channel = array();
// parse the data:
$xml_parser = drupal_xml_parser_create($data);
xml_set_element_handler($xml_parser, 'asset_search_element_start', 'asset_search_element_end');
xml_set_character_data_handler($xml_parser, 'asset_search_element_data');
if (!xml_parse($xml_parser, $data, 1)) {
watchdog('asset_search', t('The feed seems to be broken, due to an error "%error" on line %line.', array(
'%error' => xml_error_string(xml_get_error_code($xml_parser)),
'%line' => xml_get_current_line_number($xml_parser),
)), WATCHDOG_WARNING);
drupal_set_message(t('The feed seems to be broken, because of error "%error" on line %line.', array(
'%error' => xml_error_string(xml_get_error_code($xml_parser)),
'%line' => xml_get_current_line_number($xml_parser),
)), 'error');
return 0;
}
xml_parser_free($xml_parser);
// Initialize variables
$title = $link = $author = $description = $guid = NULL;
foreach ($items as $item) {
unset($title, $link, $author, $description, $guid);
// Prepare the item:
foreach ($item as $key => $value) {
$item[$key] = trim($value);
}
/*
** Resolve the item's title. If no title is found, we use
** up to 40 characters of the description ending at a word
** boundary but not splitting potential entities.
*/
if ($item['TITLE']) {
$title = $item['TITLE'];
}
else {
$title = preg_replace('/^(.*)[^\\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40));
}
/*
** Resolve the items link.
*/
if ($item['LINK']) {
$link = $item['LINK'];
}
if ($item['GUID']) {
$guid = $item['GUID'];
}
/**
* Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag
*/
if ($item['CONTENT:ENCODED']) {
$item['DESCRIPTION'] = $item['CONTENT:ENCODED'];
}
else {
if ($item['SUMMARY']) {
$item['DESCRIPTION'] = $item['SUMMARY'];
}
else {
if ($item['CONTENT']) {
$item['DESCRIPTION'] = $item['CONTENT'];
}
}
}
/*
** Try to resolve and parse the item's publication date. If no
** date is found, we use the current date instead.
*/
if ($item['PUBDATE']) {
$date = $item['PUBDATE'];
}
else {
if ($item['DC:DATE']) {
$date = $item['DC:DATE'];
}
else {
if ($item['DCTERMS:ISSUED']) {
$date = $item['DCTERMS:ISSUED'];
}
else {
if ($item['DCTERMS:CREATED']) {
$date = $item['DCTERMS:CREATED'];
}
else {
if ($item['DCTERMS:MODIFIED']) {
$date = $item['DCTERMS:MODIFIED'];
}
else {
if ($item['ISSUED']) {
$date = $item['ISSUED'];
}
else {
if ($item['CREATED']) {
$date = $item['CREATED'];
}
else {
if ($item['MODIFIED']) {
$date = $item['MODIFIED'];
}
else {
if ($item['PUBLISHED']) {
$date = $item['PUBLISHED'];
}
else {
if ($item['UPDATED']) {
$date = $item['UPDATED'];
}
else {
$date = 'now';
}
}
}
}
}
}
}
}
}
}
$timestamp = strtotime($date);
// As of PHP 5.1.0, strtotime returns FALSE on failure instead of -1.
if ($timestamp <= 0) {
$timestamp = asset_search_parse_w3cdtf($date);
// Returns FALSE on failure
if (!$timestamp) {
$timestamp = time();
// better than nothing
}
}
$tmp = new stdClass();
$tmp->aid = -1;
$tmp->created = $timestamp;
$tmp->title = $title;
$tmp->link = $link;
$tmp->guid = $guid;
$tmp->cid = md5($guid);
$tmp->search_type = $type;
$tmp = module_invoke($type['module'], 'asset_search', 'feed item', $type, $tmp);
cache_set($tmp->cid, 'cache_asset_search', serialize($tmp));
$tmp_assets[$tmp->cid] = $tmp;
}
$channel['items'] = $tmp_assets;
return $channel;
}