parser_common_syndication.module in FeedAPI 5

Same filename and directory in other branches
6 parser_common_syndication/parser_common_syndication.module
Parse the incoming URL with SimpleXML then provide a data structure of the feed. Requires PHP5 because of SimpleXML.
File

parser_common_syndication/parser_common_syndication.module
View source
<?php

/**
 * @file
 * Parse the incoming URL with SimpleXML then provide a data structure of the feed.
 * Requires PHP5 because of SimpleXML.
 */

/**
 * Implementation of hook_help().
 */
function parser_common_syndication_help($section) {
  switch ($section) {
    case 'admin/modules#description':
      return t('Provide a common syndication parser for FeedAPI-compatible modules. Only PHP5-compatible. Rather fast.');
    case 'feedapi/full_name':
      return t('Parser Common Syndication - only for PHP5');
  }
}

/**
 * Implementation of hook_feedapi_feed().
 */
function parser_common_syndication_feedapi_feed($op) {
  $args = func_get_args();
  switch ($op) {
    case 'type':
      return array(
        "XML feed",
      );
    case 'compatible':
      if (!function_exists('simplexml_load_string')) {
        return FALSE;
      }
      $url = $args[1]->url;
      $downloaded_string = _parser_common_syndication_download($url, $op);
      if (is_object($downloaded_string)) {
        return array_shift(parser_common_syndication_feedapi_feed('type'));
      }
      if (!defined('LIBXML_VERSION') || version_compare(phpversion(), '5.1.0', '<')) {
        @($xml = simplexml_load_string($downloaded_string, NULL));
      }
      else {
        @($xml = simplexml_load_string($downloaded_string, NULL, LIBXML_NOERROR | LIBXML_NOWARNING));
      }
      if (_parser_common_syndication_feed_format_detect($xml) != FALSE) {

        // The parser is compatible. Then has to parse the feed and cache it. Because in the download
        // part, the feed etag data be already saved perhaps (depends on the webserver).
        $parsed_feed = _parser_common_syndication_feedapi_parse($xml);
        if (is_object($parsed_feed) && $parsed_feed->from_cache !== TRUE) {
          _parser_common_syndication_cache_set($url, $parsed_feed);
        }

        // We don't have to choose between the types, because this module is only able to parse one.
        return array_shift(parser_common_syndication_feedapi_feed('type'));
      }
      return FALSE;
    case 'parse':
      $feed = is_object($args[1]) ? $args[1] : FALSE;
      $parsed_feed = _parser_common_syndication_feedapi_parse($feed);
      if (is_object($parsed_feed) && $parsed_feed->from_cache !== TRUE) {
        _parser_common_syndication_cache_set($feed->url, $parsed_feed);
      }
      return $parsed_feed;
  }
}

/**
 * Implementaton of hook_requirements().
 */
function parser_common_syndication_requirements() {
  $t = get_t();
  if (!version_compare(5, PHP_VERSION, '<=') || !function_exists('simplexml_load_file')) {
    return array(
      'Parser Common Syndication' => array(
        'title' => t('SimpleXML library.'),
        'description' => t('A fast XML parsing library. (From PHP5)'),
        'severity' => REQUIREMENT_ERROR,
        'value' => $t('Missing'),
      ),
    );
  }
  return array(
    'Parser Common Syndication' => array(
      'title' => t('SimpleXML library.'),
      'description' => t('A fast XML parsing library. (From PHP5)'),
      'severity' => REQUIREMENT_OK,
      'value' => $t('Available'),
    ),
  );
}

/**
 * Parse the feed into a data structure.
 *
 * @param $feed
 *  The feed object (contains the URL or the parsed XML structure)
 * @return stdClass
 *  The structured datas extracted from the feed
 */
function _parser_common_syndication_feedapi_parse($feed) {
  if (is_a($feed, 'SimpleXMLElement')) {
    $xml = $feed;
  }
  else {
    $downloaded_string = _parser_common_syndication_download($feed->url, 'parse');
    if ($downloaded_string === FALSE || is_object($downloaded_string)) {
      return $downloaded_string;
    }
    if (!defined('LIBXML_VERSION') || version_compare(phpversion(), '5.1.0', '<')) {
      @($xml = simplexml_load_string($downloaded_string, NULL));
    }
    else {
      @($xml = simplexml_load_string($downloaded_string, NULL, LIBXML_NOERROR | LIBXML_NOWARNING));
    }

    // Got a malformed XML.
    if ($xml === FALSE || $xml == NULL) {
      return FALSE;
    }
  }
  $feed_type = _parser_common_syndication_feed_format_detect($xml);
  if ($feed_type == "atom1.0") {
    return _parser_common_syndication_atom10_parse($xml);
  }
  if ($feed_type == "RSS2.0" || $feed_type == "RSS0.91" || $feed_type == "RSS0.92") {
    return _parser_common_syndication_RSS20_parse($xml);
  }
  if ($feed_type == "RDF") {
    return _parser_common_syndication_RDF10_parse($xml);
  }
  return FALSE;
}

/**
 * Get the cached version of the <var>$url</var>
 */
function _parser_common_syndication_cache_get($url) {
  $cache_file = _parser_common_syndication_sanitize_cache() . '/' . md5($url);
  if (file_exists($cache_file)) {
    $file_content = file_get_contents($cache_file);
    return unserialize($file_content);
  }
  return FALSE;
}

/**
 * Store the parsed feed into the cache
 */
function _parser_common_syndication_cache_set($url, $parsed_feed) {
  $cache_file = _parser_common_syndication_sanitize_cache() . '/' . md5($url);
  $cache_fp = fopen($cache_file, 'w');
  fwrite($cache_fp, serialize($parsed_feed));
  fclose($cache_fp);
}

/**
 * Get the content from the given URL.
 *
 * @param $url
 *  A valid URL (not only web URLs).
 * @param $username
 *  If the URL use authentication, here you can supply the username for this.
 * @param $password
 *  If the URL use authentication, here you can supply the password for this.
 * @return
 *  The data pulled from the URL or FALSE if the feed does not need refresh.
 */
function _parser_common_syndication_feedapi_get($url, $username = NULL, $password = NULL) {
  $method = 'GET';
  $follow = 3;
  $data = NULL;

  // Only download and parse data if really needs refresh.  Based on Last-Modified and If-Modified-Since
  $headers = array();
  $validate = db_fetch_array(db_query("SELECT etag, last_modified FROM {parser_common_syndication} WHERE url = '%s'", $url));
  if (!empty($validate['etag'])) {
    $headers['If-None-Match'] = $validate['etag'];
  }
  if (!empty($validate['last_modified'])) {
    $headers['If-Modified-Since'] = $validate['last_modified'];
  }
  if (!empty($username)) {
    $headers['Authorization'] = 'Basic ' . base64_encode("{$username}:{$password}");
  }
  $result = drupal_http_request($url, $headers, $method, $data, $follow);

  // In this case return the cached data
  if ($result->code == 304) {
    $cached_data = _parser_common_syndication_cache_get($url);
    if (is_object($cached_data)) {
      $cached_data->from_cache = TRUE;
      return $cached_data;
    }
    else {

      // It's a tragedy, this file has to be exist and contains good data. In this case, repeat the stuff without cache
      db_query("DELETE FROM {parser_common_syndication} WHERE url = '%s'", $url);
      return _parser_common_syndication_feedapi_get($url, $username, $password);
    }
  }
  if (db_result(db_query("SELECT COUNT(*) FROM {parser_common_syndication} WHERE url = '%s'", $url)) == 0) {
    db_query("INSERT INTO {parser_common_syndication} (etag, last_modified, url) VALUES ('%s', '%s', '%s')", $result->headers['ETag'], $result->headers['Last-Modified'], $url);
  }
  else {
    db_query("UPDATE {parser_common_syndication} SET etag = '%s', last_modified = '%s' WHERE url = '%s'", $result->headers['ETag'], $result->headers['Last-Modified'], $url);
  }
  return $result->data;
}

/**
 * Delete cache validating functions when feed is deleted
 */
function parser_common_syndication_nodeapi(&$node, $op) {
  if (isset($node->feed) || feedapi_enabled_type($node->type)) {
    switch ($op) {
      case 'delete':
        db_query("DELETE FROM {parser_common_syndication} WHERE url = '%s'", $node->feed->url);
        $cache_dir = _parser_common_syndication_sanitize_cache();
        $cache_filename = $cache_dir . '/' . md5($node->feed->url);
        if (file_exists($cache_filename)) {
          unlink($cache_filename);
        }
        break;
    }
  }
}

/**
 * Determine the feed format of a SimpleXML parsed object structure.
 *
 * @param $xml
 *  SimpleXML-preprocessed feed.
 * @return
 *  The feed format short description or FALSE if not compatible.
 */
function _parser_common_syndication_feed_format_detect($xml) {
  if (!is_object($xml)) {
    return FALSE;
  }
  $attr = $xml
    ->attributes();
  $type = strtolower($xml
    ->getName());
  if (isset($xml->entry) && $type == "feed") {
    return "atom1.0";
  }
  if ($type == "rss" && $attr["version"] == "2.0") {
    return "RSS2.0";
  }
  if ($type == "rdf" && isset($xml->channel)) {
    return "RDF";
  }
  if ($type == "rss" && $attr["version"] == "0.91") {
    return "RSS0.91";
  }
  if ($type == "rss" && $attr["version"] == "0.92") {
    return "RSS0.92";
  }
  return FALSE;
}

/**
 * Call one of the possible feedapi_get hook and pass back the downloaded data
 *
 * @return
 *  string - the downloaded data, FALSE - if the URL is not reachable
 */
function _parser_common_syndication_download($url, $op) {
  if (valid_url($url, TRUE)) {

    // Handle password protected feeds.
    $url_parts = parse_url($url);
    $password = $username = NULL;
    if (!empty($url_parts['user'])) {
      $password = $url_parts['pass'];
      $username = $url_parts['user'];
    }
  }
  $downloaded_string = _parser_common_syndication_feedapi_get($url, $username, $password, $op);

  // Cannot get the feed, pass the problem to one level upper
  if ($downloaded_string == "") {
    return FALSE;
  }
  else {
    if (is_object($downloaded_string)) {
      return $downloaded_string;
    }
  }

  // Do the autodiscovery at this level, pass back the real data
  // Maybe it's HTML. If it's not HTML, not worth to take a look into the downloaded string
  if (strpos(strtolower($downloaded_string), "<html") !== FALSE) {
    $allowed_mime = array(
      "text/xml",
      "application/rss+xml",
      "application/atom+xml",
      "application/rdf+xml",
      "application/xml",
    );
    $matches = array();

    // Get all the links tag
    preg_match_all('/<link\\s+(.*?)\\s*\\/?>/si', $downloaded_string, $matches);
    $links = $matches[1];
    $rss_link = FALSE;
    foreach ($links as $link) {
      $mime = array();

      // Get the type attribute and check if the mime type is allowed.
      preg_match_all('/type\\s*=\\s*("|' . "'" . ')([A-Za-z\\/+]*)("|' . "'" . ')/si', $link, $mime);
      if (in_array(array_pop($mime[2]), $allowed_mime)) {
        $href = array();

        // Get the href attribute.
        preg_match_all('/href\\s*=\\s*("|' . "'" . ')([=#\\?_:.0-9A-Za-z\\/+]*)("|' . "'" . ')/si', $link, $href);
        $rss_link = array_pop($href[2]);
        if (is_string($rss_link) && strlen($rss_link) > 0 && $rss_link != $url) {

          // Handle base url related stuff.
          $parsed_url = parse_url($rss_link);
          if (!isset($parsed_url['host'])) {

            // It's relative so make it absolute.
            $base_tag = array();
            preg_match_all('/<base href\\s*=\\s*("|' . "'" . ')([_:.0-9A-Za-z\\/+]*)("|' . "'" . ')/si', $link, $base_tag);
            $base_url = array_pop($base_tag[2]);
            if (is_string($base_url) && strlen($base_url) > 0) {

              // Get from the HTML base tag.
              $rss_link = $base_url . $rss_link;
            }
            else {

              // Guess from the original URL.
              $original_url = parse_url($url);
              $rss_link = $original_url['scheme'] . '://' . $original_url['host'] . (isset($original_url['port']) ? ':' : '') . $original_url['port'] . $parsed_url['path'] . '?' . $parsed_url['query'] . '#' . $parsed_url['fragment'];
            }
          }
          $downloaded_string = _parser_common_syndication_download($rss_link, $op);
          break;
        }
      }
    }
  }

  // Filter out strange tags. Without this, the text would contain strange stuff.
  // @todo: make sure that these are not important for feed element mapper
  $downloaded_string = preg_replace(array(
    '@<script[^>]*?.*?</script>@si',
    '@<object[^>]*?.*?</object>@si',
    '@<embed[^>]*?.*?</embed>@si',
    '@<applet[^>]*?.*?</applet>@si',
    '@<noframes[^>]*?.*?</noframes>@si',
    '@<noscript[^>]*?.*?</noscript>@si',
    '@<noembed[^>]*?.*?</noembed>@si',
  ), array(
    '',
    '',
    '',
    '',
    '',
    '',
    '',
  ), $downloaded_string);

  // Ugly hack to be able to retrieve the xml:base property, no way to access xml:lang inside <feed>
  $downloaded_string = preg_replace('/xml:base *=/', 'base=', $downloaded_string);
  return $downloaded_string;
}

/**
 * Parse atom feeds.
 */
function _parser_common_syndication_atom10_parse($feed_XML) {
  $parsed_source = new stdClass();
  $base = (string) array_shift($feed_XML
    ->xpath("@base"));
  if (!valid_url($base, TRUE)) {
    $base = FALSE;
  }

  // Detect the title
  $parsed_source->title = isset($feed_XML->title) ? _parser_common_syndication_title("{$feed_XML->title}") : "";

  // Detect the description
  $parsed_source->description = isset($feed_XML->subtitle) ? "{$feed_XML->subtitle}" : "";
  $parsed_source->options = new stdClass();
  $parsed_source->options->link = _parser_common_syndication_link($feed_XML->link);
  if (valid_url($parsed_source->options->link) && !valid_url($parsed_source->options->link, TRUE) && !empty($base)) {
    $parsed_source->options->link = $base . $parsed_source->options->link;
  }
  $parsed_source->items = array();
  foreach ($feed_XML->entry as $news) {
    $original_url = NULL;
    $guid = !empty($news->id) ? "{$news->id}" : NULL;

    // I don't know how standard this is, but sometimes the id is the URL.
    if (valid_url($guid, TRUE)) {
      $original_url = $guid;
    }
    $additional_taxonomies = array();
    if (isset($news->category)) {
      $additional_taxonomies['ATOM Categories'] = array();
      $additional_taxonomies['ATOM Domains'] = array();
      foreach ($news->category as $category) {
        $additional_taxonomies['ATOM Categories'][] = "{$category['term']}";
        if (isset($category['scheme'])) {
          $domain = "{$category['scheme']}";
          if (!empty($domain)) {
            if (!isset($additional_taxonomies['ATOM Domains'][$domain])) {
              $additional_taxonomies['ATOM Domains'][$domain] = array();
            }
            $additional_taxonomies['ATOM Domains'][$domain][] = count($additional_taxonomies['ATOM Categories']) - 1;
          }
        }
      }
    }
    $title = "{$news->title}";
    if (!empty($news->content)) {
      $body = '';
      foreach ($news->content
        ->children() as $child) {
        $body .= $child
          ->asXML();
      }
      $body .= "{$news->content}";
    }
    else {
      if (!empty($news->summary)) {
        $body = '';
        foreach ($news->summary
          ->children() as $child) {
          $body .= $child
            ->asXML();
        }
        $body .= "{$news->summary}";
      }
    }
    if (!empty($news->content['src'])) {

      // some src elements in some valid atom feeds contained no urls at all
      if (valid_url("{$news->content['src']}")) {
        $original_url = "{$news->content['src']}";
      }
    }
    $author_found = FALSE;
    if (!empty($news->source->author->name)) {
      $original_author = "{$news->source->author->name}";
      $author_found = TRUE;
    }
    else {
      if (!empty($news->author->name)) {
        $original_author = "{$news->author->name}";
        $author_found = TRUE;
      }
    }
    if (!empty($feed_XML->author->name) && !$author_found) {
      $original_author = "{$feed_XML->author->name}";
    }
    $original_url = _parser_common_syndication_link($news->link);
    $item = new stdClass();
    $item->title = _parser_common_syndication_title($title);
    $item->description = $body;
    $item->options = new stdClass();
    $item->options->original_author = $original_author;
    $item->options->timestamp = _parser_common_syndication_parse_date(isset($news->published) ? "{$news->published}" : "{$news->issued}");
    $item->options->original_url = $original_url;
    if (valid_url($item->options->original_url) && !valid_url($item->options->original_url, TRUE) && !empty($base)) {
      $item->options->original_url = $base . $item->options->original_url;
    }
    $item->options->guid = $guid;
    $item->options->tags = $additional_taxonomies['ATOM Categories'];
    $item->options->domains = $additional_taxonomies['ATOM Domains'];
    $parsed_source->items[] = $item;
  }
  return $parsed_source;
}

/**
 * Parse RSS1.0/RDF feeds.
 */
function _parser_common_syndication_RDF10_parse($feed_XML) {
  $parsed_source = new stdClass();

  // Detect the title.
  $parsed_source->title = isset($feed_XML->channel->title) ? _parser_common_syndication_title("{$feed_XML->channel->title}") : "";

  // Detect the description.
  $parsed_source->description = isset($feed_XML->channel->description) ? "{$feed_XML->channel->description}" : "";
  $parsed_source->options = new stdClass();

  // Detect the link.
  $parsed_source->options->link = isset($feed_XML->channel->link) ? "{$feed_XML->channel->link}" : "";
  $parsed_source->items = array();

  // Set category splitter (space is for del.icio.us feed).
  $category_splitter = ' ';

  // Get the default original author.
  if ($feed_XML->channel->title) {
    $oa = "{$feed_XML->channel->title}";
  }

  // Get all namespaces.
  if (version_compare(phpversion(), '5.1.2', '<')) {

    // Versions prior 5.1.2 don't allow namespaces.
    $namespaces['default'] = NULL;
  }
  else {
    $namespaces = $feed_XML
      ->getNamespaces(TRUE);
  }
  foreach ($feed_XML->item as $news) {

    // Initialization.
    $guid = $original_url = NULL;
    $title = $body = '';
    $additional_taxonomies = array();
    $original_author = $oa;
    foreach ($namespaces as $ns_link) {

      // Get about attribute as guid.
      foreach ($news
        ->attributes($ns_link) as $name => $value) {
        if ($name == 'about') {
          $guid = "{$value}";
        }
      }

      // Get children for current namespace.
      if (version_compare(phpversion(), '5.1.2', '<')) {
        $ns = (array) $news;
      }
      else {
        $ns = (array) $news
          ->children($ns_link);
      }

      // Title
      if (!empty($ns['title'])) {
        $title = "{$ns['title']}";
      }

      // Description or dc:description
      if (!empty($ns['description']) && $body == '') {
        $body = "{$ns['description']}";
      }

      // Link
      if (!empty($ns['link'])) {
        $original_url = "{$ns['link']}";
      }

      // dc:creator
      if (!empty($ns['creator'])) {
        $original_author = "{$ns['creator']}";
      }

      // content:encoded
      if (!empty($ns['encoded'])) {
        $body = "{$ns['encoded']}";
      }

      // dc:subject
      if (!empty($ns['subject'])) {

        // There can be multiple category tags.
        if (is_array($ns['subject'])) {
          foreach ($ns['subject'] as $cat) {
            if (is_object($cat)) {
              $additional_taxonomies['RDF Categories'][] = trim(strip_tags($cat
                ->asXML()));
            }
            else {
              $additional_taxonomies['RDF Categories'][] = $cat;
            }
          }
        }
        else {

          //or single tag
          $additional_taxonomies['RDF Categories'] = explode($category_splitter, "{$ns['subject']}");
        }
      }
    }

    // The description is not mandatory so use title if description not present.
    if (empty($body)) {
      $body = $title;
    }

    // If there are no link tag but rdf:about is provided.
    if (empty($original_url) && !empty($guid)) {
      $original_url = $guid;
    }
    $item = new stdClass();
    $item->title = _parser_common_syndication_title($title);
    $item->description = $body;
    $item->options = new stdClass();
    $item->options->original_author = $original_author;
    $item->options->timestamp = _parser_common_syndication_parse_date(empty($ns['pubDate']) ? "{$ns['date']}" : "{$ns['date']}");
    $item->options->original_url = $original_url;
    $item->options->guid = $guid;
    $item->options->tags = $additional_taxonomies['RDF Categories'];
    $parsed_source->items[] = $item;
  }
  return $parsed_source;
}

/**
 * Parse RSS2.0 feeds.
 */
function _parser_common_syndication_RSS20_parse($feed_XML) {
  $parsed_source = new stdClass();

  // Detect the title.
  $parsed_source->title = isset($feed_XML->channel->title) ? _parser_common_syndication_title("{$feed_XML->channel->title}") : "";

  // Detect the description.
  $parsed_source->description = isset($feed_XML->channel->description) ? "{$feed_XML->channel->description}" : "";
  $parsed_source->options = new stdClass();

  // Detect the link.
  $parsed_source->options->link = isset($feed_XML->channel->link) ? "{$feed_XML->channel->link}" : "";
  $parsed_source->items = array();
  foreach ($feed_XML
    ->xpath('//item') as $news) {

    // for PHP > 5.1.2 get 'content' namespace
    $category = $news
      ->xpath('category');

    // Get children for current namespace.
    if (version_compare(phpversion(), '5.1.2', '>')) {
      $content = (array) $news
        ->children('http://purl.org/rss/1.0/modules/content/');
    }
    $news = (array) $news;
    $news['category'] = $category;
    if (isset($news['guid'])) {
      $guid = $news['guid'];
    }
    else {
      $guid = NULL;
    }
    if (isset($news['title'])) {
      $title = "{$news['title']}";
    }
    else {
      $title = '';
    }
    if (isset($news['description'])) {
      $body = "{$news['description']}";
    }
    elseif (isset($news['encoded'])) {

      // content:encoded for PHP < 5.1.2.
      $body = "{$news['encoded']}";
    }
    elseif (isset($content['encoded'])) {

      // content:encoded for PHP >= 5.1.2.
      $body = "{$content['encoded']}";
    }
    else {
      $body = "{$news['title']}";
    }
    if (!empty($feed_XML->channel->title)) {
      $original_author = "{$feed_XML->channel->title}";
    }
    if (!empty($news['link'])) {
      $original_url = "{$news['link']}";
    }
    else {
      $original_url = NULL;
    }
    $additional_taxonomies = array();
    $additional_taxonomies['RSS Categories'] = array();
    $additional_taxonomies['RSS Domains'] = array();
    if (isset($news['category'])) {
      foreach ($news['category'] as $category) {
        $additional_taxonomies['RSS Categories'][] = "{$category}";
        if (isset($category['domain'])) {
          $domain = "{$category['domain']}";
          if (!empty($domain)) {
            if (!isset($additional_taxonomies['RSS Domains'][$domain])) {
              $additional_taxonomies['RSS Domains'][$domain] = array();
            }
            $additional_taxonomies['RSS Domains'][$domain][] = count($additional_taxonomies['RSS Categories']) - 1;
          }
        }
      }
    }
    $item = new stdClass();
    $item->title = _parser_common_syndication_title($title);
    $item->description = $body;
    $item->options = new stdClass();
    $item->options->original_author = $original_author;
    $item->options->timestamp = _parser_common_syndication_parse_date($news['pubDate']);
    $item->options->original_url = $original_url;
    $item->options->guid = $guid;
    $item->options->tags = $additional_taxonomies['RSS Categories'];
    $item->options->domains = $additional_taxonomies['RSS Domains'];
    $parsed_source->items[] = $item;
  }
  return $parsed_source;
}

/**
 * Set the default caching directory if the current setting is not useable
 */
function _parser_common_syndication_sanitize_cache() {
  $cache_location = file_directory_path() . '/parser_common_syndication_cache';
  if (!is_writeable($cache_location) || !is_dir($cache_location)) {
    $cache_location = file_create_path($cache_location);
    if (!file_exists($cache_location) && is_writable(file_directory_path())) {
      mkdir($cache_location);
    }
    if (!is_writeable($cache_location)) {
      return FALSE;
    }
  }
  return $cache_location;
}

/**
 * Parse a date comes from a feed.
 *
 * @param $date_string
 *  The date string in various formats.
 * @return
 *  The timestamp of the string or the current time if can't be parsed
 */
function _parser_common_syndication_parse_date($date_str) {
  $parsed_date = strtotime($date_str);
  if ($parsed_date === FALSE || $parsed_date == -1) {
    $parsed_date = _parser_common_syndication_parse_w3cdtf($date_str);
  }
  return $parsed_date === FALSE ? time() : $parsed_date;
}

/**
 * Parse the W3C date/time format, a subset of ISO 8601.
 *
 * PHP date parsing functions do not handle this format.
 * See http://www.w3.org/TR/NOTE-datetime for more information.
 * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
 *
 * @param $date_str
 *   A string with a potentially W3C DTF date.
 * @return
 *   A timestamp if parsed successfully or FALSE if not.
 */
function _parser_common_syndication_parse_w3cdtf($date_str) {
  if (preg_match('/(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2})(:(\\d{2}))?(?:([-+])(\\d{2}):?(\\d{2})|(Z))?/', $date_str, $match)) {
    list($year, $month, $day, $hours, $minutes, $seconds) = array(
      $match[1],
      $match[2],
      $match[3],
      $match[4],
      $match[5],
      $match[6],
    );

    // Calculate the epoch for current date assuming GMT.
    $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
    if ($match[10] != 'Z') {

      // Z is zulu time, aka GMT
      list($tz_mod, $tz_hour, $tz_min) = array(
        $match[8],
        $match[9],
        $match[10],
      );

      // Zero out the variables.
      if (!$tz_hour) {
        $tz_hour = 0;
      }
      if (!$tz_min) {
        $tz_min = 0;
      }
      $offset_secs = ($tz_hour * 60 + $tz_min) * 60;

      // Is timezone ahead of GMT?  If yes, subtract offset.
      if ($tz_mod == '+') {
        $offset_secs *= -1;
      }
      $epoch += $offset_secs;
    }
    return $epoch;
  }
  else {
    return FALSE;
  }
}

/**
 * Extract the link that points to the original content (back to site or origi
 *
 * @param $links
 *  Array of SimpleXML objects
 */
function _parser_common_syndication_link($links) {
  $to_link = '';
  if (count($links) > 0) {
    foreach ($links as $link) {
      $link = $link
        ->attributes();
      $to_link = isset($link["href"]) ? "{$link["href"]}" : "";
      if (isset($link["rel"])) {
        if ("{$link["rel"]}" == 'alternate') {
          break;
        }
      }
    }
  }
  return $to_link;
}

/**
 * Prepare raw data to be a title
 */
function _parser_common_syndication_title($title) {
  return html_entity_decode(strip_tags($title), ENT_QUOTES, 'UTF-8');
}
Functions

Name	Description
parser_common_syndication_feedapi_feed	Implementation of hook_feedapi_feed().
parser_common_syndication_help	Implementation of hook_help().
parser_common_syndication_nodeapi	Delete cache validating functions when feed is deleted
parser_common_syndication_requirements	Implementaton of hook_requirements().
_parser_common_syndication_atom10_parse	Parse atom feeds.
_parser_common_syndication_cache_get	Get the cached version of the <var>$url</var>
_parser_common_syndication_cache_set	Store the parsed feed into the cache
_parser_common_syndication_download	Call one of the possible feedapi_get hook and pass back the downloaded data
_parser_common_syndication_feedapi_get	Get the content from the given URL.
_parser_common_syndication_feedapi_parse	Parse the feed into a data structure.
_parser_common_syndication_feed_format_detect	Determine the feed format of a SimpleXML parsed object structure.
_parser_common_syndication_link	Extract the link that points to the original content (back to site or origi
_parser_common_syndication_parse_date	Parse a date comes from a feed.
_parser_common_syndication_parse_w3cdtf	Parse the W3C date/time format, a subset of ISO 8601.
_parser_common_syndication_RDF10_parse	Parse RSS1.0/RDF feeds.
_parser_common_syndication_RSS20_parse	Parse RSS2.0 feeds.
_parser_common_syndication_sanitize_cache	Set the default caching directory if the current setting is not useable
_parser_common_syndication_title	Prepare raw data to be a title
You are here

parser_common_syndication.module in FeedAPI 5

File

Functions

API Navigation