You are here

function _parser_common_syndication_atom10_parse in Feeds 6

Same name and namespace in other branches
  1. 8.2 libraries/common_syndication_parser.inc \_parser_common_syndication_atom10_parse()
  2. 7.2 libraries/common_syndication_parser.inc \_parser_common_syndication_atom10_parse()
  3. 7 libraries/common_syndication_parser.inc \_parser_common_syndication_atom10_parse()

Parse atom feeds.

1 call to _parser_common_syndication_atom10_parse()
common_syndication_parser_parse in libraries/common_syndication_parser.inc
Parse the feed into a data structure.

File

libraries/common_syndication_parser.inc, line 93
Downloading and parsing functions for Common Syndication Parser. Pillaged from FeedAPI common syndication parser.

Code

function _parser_common_syndication_atom10_parse($feed_XML) {
  $parsed_source = array();
  $ns = array(
    "georss" => "http://www.georss.org/georss",
  );
  $base = $feed_XML
    ->xpath("@base");
  $base = (string) array_shift($base);
  if (!valid_url($base, TRUE)) {
    $base = FALSE;
  }

  // Detect the title
  $parsed_source['title'] = isset($feed_XML->title) ? _parser_common_syndication_title("{$feed_XML->title}") : "";

  // Detect the description
  $parsed_source['description'] = isset($feed_XML->subtitle) ? "{$feed_XML->subtitle}" : "";
  $parsed_source['link'] = _parser_common_syndication_link($feed_XML->link);
  if (valid_url($parsed_source['link']) && !valid_url($parsed_source['link'], TRUE) && !empty($base)) {
    $parsed_source['link'] = $base . $parsed_source['link'];
  }
  $parsed_source['items'] = array();
  foreach ($feed_XML->entry as $news) {
    $original_url = NULL;
    $guid = !empty($news->id) ? "{$news->id}" : NULL;
    if (valid_url($guid, TRUE)) {
      $original_url = $guid;
    }
    $georss = (array) $news
      ->children($ns["georss"]);
    $geoname = '';
    if (isset($georss['featureName'])) {
      $geoname = "{$georss['featureName']}";
    }
    $latlon = $lat = $lon = NULL;
    if (isset($georss['point'])) {
      $latlon = explode(' ', $georss['point']);
      $lat = "{$latlon[0]}";
      $lon = "{$latlon[1]}";
      if (!$geoname) {
        $geoname = "{$lat} {$lon}";
      }
    }
    $additional_taxonomies = array();
    if (isset($news->category)) {
      $additional_taxonomies['ATOM Categories'] = array();
      $additional_taxonomies['ATOM Domains'] = array();
      foreach ($news->category as $category) {
        if (isset($category['scheme'])) {
          $domain = "{$category['scheme']}";
          if (!empty($domain)) {
            if (!isset($additional_taxonomies['ATOM Domains'][$domain])) {
              $additional_taxonomies['ATOM Domains'][$domain] = array();
            }
            $additional_taxonomies['ATOM Domains'][$domain][] = count($additional_taxonomies['ATOM Categories']) - 1;
          }
        }
        $additional_taxonomies['ATOM Categories'][] = "{$category['term']}";
      }
    }
    $title = "{$news->title}";
    $body = '';
    if (!empty($news->content)) {
      foreach ($news->content
        ->children() as $child) {
        $body .= $child
          ->asXML();
      }
      $body .= "{$news->content}";
    }
    elseif (!empty($news->summary)) {
      foreach ($news->summary
        ->children() as $child) {
        $body .= $child
          ->asXML();
      }
      $body .= "{$news->summary}";
    }
    if (!empty($news->content['src'])) {

      // some src elements in some valid atom feeds contained no urls at all
      if (valid_url("{$news->content['src']}", TRUE)) {
        $original_url = "{$news->content['src']}";
      }
    }
    $author_found = FALSE;
    if (!empty($news->source->author->name)) {
      $original_author = "{$news->source->author->name}";
      $author_found = TRUE;
    }
    elseif (!empty($news->author->name)) {
      $original_author = "{$news->author->name}";
      $author_found = TRUE;
    }
    if (!empty($feed_XML->author->name) && !$author_found) {
      $original_author = "{$feed_XML->author->name}";
    }
    $original_url = _parser_common_syndication_link($news->link);
    $item = array();
    $item['title'] = _parser_common_syndication_title($title, $body);
    $item['description'] = $body;
    $item['author_name'] = $original_author;

    // Fall back to updated for timestamp if both published and issued are
    // empty.
    if (isset($news->published)) {
      $item['timestamp'] = _parser_common_syndication_parse_date("{$news->published}");
    }
    elseif (isset($news->issued)) {
      $item['timestamp'] = _parser_common_syndication_parse_date("{$news->issued}");
    }
    elseif (isset($news->updated)) {
      $item['timestamp'] = _parser_common_syndication_parse_date("{$news->updated}");
    }
    $item['url'] = trim($original_url);
    if (valid_url($item['url']) && !valid_url($item['url'], TRUE) && !empty($base)) {
      $item['url'] = $base . $item['url'];
    }

    // Fall back on URL if GUID is empty.
    if (!empty($guid)) {
      $item['guid'] = $guid;
    }
    else {
      $item['guid'] = $item['url'];
    }
    $item['geolocations'] = array();
    if ($lat && $lon) {
      $item['geolocations'] = array(
        array(
          'name' => $geoname,
          'lat' => $lat,
          'lon' => $lon,
        ),
      );
    }
    $item['tags'] = isset($additional_taxonomies['ATOM Categories']) ? $additional_taxonomies['ATOM Categories'] : array();
    $item['domains'] = isset($additional_taxonomies['ATOM Domains']) ? $additional_taxonomies['ATOM Domains'] : array();
    $parsed_source['items'][] = $item;
  }
  return $parsed_source;
}