You are here

function _parser_common_syndication_RSS20_parse in Feeds 7.2

Same name and namespace in other branches
  1. 8.2 libraries/common_syndication_parser.inc \_parser_common_syndication_RSS20_parse()
  2. 6 libraries/common_syndication_parser.inc \_parser_common_syndication_RSS20_parse()
  3. 7 libraries/common_syndication_parser.inc \_parser_common_syndication_RSS20_parse()

Parse RSS2.0 feeds.

1 call to _parser_common_syndication_RSS20_parse()
common_syndication_parser_parse in libraries/common_syndication_parser.inc
Parse the feed into a data structure.

File

libraries/common_syndication_parser.inc, line 399
Downloading and parsing functions for Common Syndication Parser. Pillaged from FeedAPI common syndication parser.

Code

function _parser_common_syndication_RSS20_parse($feed_XML) {
  $ns = array(
    "content" => "http://purl.org/rss/1.0/modules/content/",
    "dc" => "http://purl.org/dc/elements/1.1/",
    "georss" => "http://www.georss.org/georss",
  );
  $parsed_source = array();

  // Detect the title.
  $parsed_source['title'] = isset($feed_XML->channel->title) ? _parser_common_syndication_title("{$feed_XML->channel->title}") : "";

  // Detect the description.
  $parsed_source['description'] = isset($feed_XML->channel->description) ? "{$feed_XML->channel->description}" : "";

  // Detect the link.
  $parsed_source['link'] = isset($feed_XML->channel->link) ? "{$feed_XML->channel->link}" : "";
  $parsed_source['items'] = array();
  foreach ($feed_XML
    ->xpath('//item') as $news) {
    $title = $body = $original_author = $original_url = $guid = '';

    // Get optional source url.
    $source_url = (string) $news->source['url'];
    $category = $news
      ->xpath('category');

    // Get children for current namespace.
    $content = (array) $news
      ->children($ns["content"]);
    $dc = (array) $news
      ->children($ns["dc"]);
    $georss = (array) $news
      ->children($ns["georss"]);
    $news = (array) $news;
    $news['category'] = $category;
    if (isset($news['title'])) {
      $title = "{$news['title']}";
    }
    if (isset($news['description'])) {
      $body = "{$news['description']}";
    }

    // Some sources use content:encoded as description i.e.
    // PostNuke PageSetter module.
    // content:encoded for PHP < 5.1.2.
    if (isset($news['encoded'])) {
      if (strlen($body) < strlen("{$news['encoded']}")) {
        $body = "{$news['encoded']}";
      }
    }

    // content:encoded for PHP >= 5.1.2.
    if (isset($content['encoded'])) {
      if (strlen($body) < strlen("{$content['encoded']}")) {
        $body = "{$content['encoded']}";
      }
    }
    if (!isset($body)) {
      $body = "{$news['title']}";
    }
    if (!empty($news['author'])) {
      $original_author = "{$news['author']}";
    }
    elseif (!empty($dc["creator"])) {
      $original_author = (string) $dc["creator"];
    }
    if (!empty($news['link'])) {
      $original_url = "{$news['link']}";
      $guid = $original_url;
    }
    if (!empty($news['guid'])) {
      $guid = "{$news['guid']}";
    }
    if (!empty($georss['featureName'])) {
      $geoname = "{$georss['featureName']}";
    }
    $lat = $lon = $latlon = $geoname = NULL;
    if (!empty($georss['point'])) {
      $latlon = explode(' ', $georss['point']);
      $lat = "{$latlon[0]}";
      $lon = "{$latlon[1]}";
      if (!$geoname) {
        $geoname = "{$lat} {$lon}";
      }
    }
    $additional_taxonomies = array();
    $additional_taxonomies['RSS Categories'] = array();
    $additional_taxonomies['RSS Domains'] = array();
    if (isset($news['category'])) {
      foreach ($news['category'] as $category) {
        $additional_taxonomies['RSS Categories'][] = "{$category}";
        if (isset($category['domain'])) {
          $domain = "{$category['domain']}";
          if (!empty($domain)) {
            if (!isset($additional_taxonomies['RSS Domains'][$domain])) {
              $additional_taxonomies['RSS Domains'][$domain] = array();
            }
            $additional_taxonomies['RSS Domains'][$domain][] = count($additional_taxonomies['RSS Categories']) - 1;
          }
        }
      }
    }
    $item = array();
    $item['title'] = _parser_common_syndication_title($title, $body);
    $item['description'] = $body;
    $item['author_name'] = $original_author;
    if (!empty($news['pubDate'])) {
      $item['timestamp'] = _parser_common_syndication_parse_date($news['pubDate']);
    }
    elseif (!empty($dc['date'])) {
      $item['timestamp'] = _parser_common_syndication_parse_date($dc['date']);
    }
    else {
      $item['timestamp'] = time();
    }
    $item['url'] = trim($original_url);
    $item['guid'] = $guid;
    if (!empty($news['source'])) {
      $item['source:title'] = $news['source'];
    }
    else {
      $item['source:title'] = NULL;
    }
    $item['source:url'] = trim($source_url);
    $item['geolocations'] = array();
    if (isset($geoname, $lat, $lon)) {
      $item['geolocations'] = array(
        array(
          'name' => $geoname,
          'lat' => $lat,
          'lon' => $lon,
        ),
      );
    }
    $item['domains'] = $additional_taxonomies['RSS Domains'];
    $item['tags'] = $additional_taxonomies['RSS Categories'];
    $parsed_source['items'][] = $item;
  }
  return $parsed_source;
}