You are here

aggregator.module in Drupal 6

Used to aggregate syndicated content (RSS, RDF, and Atom).

File

modules/aggregator/aggregator.module
View source
<?php

/**
 * @file
 * Used to aggregate syndicated content (RSS, RDF, and Atom).
 */

/**
 * Implementation of hook_help().
 */
function aggregator_help($path, $arg) {
  switch ($path) {
    case 'admin/help#aggregator':
      $output = '<p>' . t('The aggregator is a powerful on-site syndicator and news reader that gathers fresh content from RSS-, RDF-, and Atom-based feeds made available across the web. Thousands of sites (particularly news sites and blogs) publish their latest headlines and posts in feeds, using a number of standardized XML-based formats. Formats supported by the aggregator include <a href="@rss">RSS</a>, <a href="@rdf">RDF</a>, and <a href="@atom">Atom</a>.', array(
        '@rss' => 'http://cyber.law.harvard.edu/rss/',
        '@rdf' => 'http://www.w3.org/RDF/',
        '@atom' => 'http://www.atomenabled.org',
      )) . '</p>';
      $output .= '<p>' . t('Feeds contain feed items, or individual posts published by the site providing the feed. Feeds may be grouped in categories, generally by topic. Users view feed items in the <a href="@aggregator">main aggregator display</a> or by <a href="@aggregator-sources">their source</a>. Administrators can <a href="@feededit">add, edit and delete feeds</a> and choose how often to check each feed for newly updated items. The most recent items in either a feed or category can be displayed as a block through the <a href="@admin-block">blocks administration page</a>. A <a href="@aggregator-opml">machine-readable OPML file</a> of all feeds is available. A correctly configured <a href="@cron">cron maintenance task</a> is required to update feeds automatically.', array(
        '@aggregator' => url('aggregator'),
        '@aggregator-sources' => url('aggregator/sources'),
        '@feededit' => url('admin/content/aggregator'),
        '@admin-block' => url('admin/build/block'),
        '@aggregator-opml' => url('aggregator/opml'),
        '@cron' => url('admin/reports/status'),
      )) . '</p>';
      $output .= '<p>' . t('For more information, see the online handbook entry for <a href="@aggregator">Aggregator module</a>.', array(
        '@aggregator' => 'http://drupal.org/handbook/modules/aggregator/',
      )) . '</p>';
      return $output;
    case 'admin/content/aggregator':
      $output = '<p>' . t('Thousands of sites (particularly news sites and blogs) publish their latest headlines and posts in feeds, using a number of standardized XML-based formats. Formats supported by the aggregator include <a href="@rss">RSS</a>, <a href="@rdf">RDF</a>, and <a href="@atom">Atom</a>.', array(
        '@rss' => 'http://cyber.law.harvard.edu/rss/',
        '@rdf' => 'http://www.w3.org/RDF/',
        '@atom' => 'http://www.atomenabled.org',
      )) . '</p>';
      $output .= '<p>' . t('Current feeds are listed below, and <a href="@addfeed">new feeds may be added</a>. For each feed or feed category, the <em>latest items</em> block may be enabled at the <a href="@block">blocks administration page</a>.', array(
        '@addfeed' => url('admin/content/aggregator/add/feed'),
        '@block' => url('admin/build/block'),
      )) . '</p>';
      return $output;
    case 'admin/content/aggregator/add/feed':
      return '<p>' . t('Add a feed in RSS, RDF or Atom format. A feed may only have one entry.') . '</p>';
    case 'admin/content/aggregator/add/category':
      return '<p>' . t('Categories allow feed items from different feeds to be grouped together. For example, several sport-related feeds may belong to a category named <em>Sports</em>. Feed items may be grouped automatically (by selecting a category when creating or editing a feed) or manually (via the <em>Categorize</em> page available from feed item listings). Each category provides its own feed page and block.') . '</p>';
  }
}

/**
 * Implementation of hook_theme()
 */
function aggregator_theme() {
  return array(
    'aggregator_wrapper' => array(
      'arguments' => array(
        'content' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-wrapper',
    ),
    'aggregator_categorize_items' => array(
      'arguments' => array(
        'form' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
    ),
    'aggregator_feed_source' => array(
      'arguments' => array(
        'feed' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-feed-source',
    ),
    'aggregator_block_item' => array(
      'arguments' => array(
        'item' => NULL,
        'feed' => 0,
      ),
    ),
    'aggregator_summary_items' => array(
      'arguments' => array(
        'summary_items' => NULL,
        'source' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-summary-items',
    ),
    'aggregator_summary_item' => array(
      'arguments' => array(
        'item' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-summary-item',
    ),
    'aggregator_item' => array(
      'arguments' => array(
        'item' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
      'template' => 'aggregator-item',
    ),
    'aggregator_page_opml' => array(
      'arguments' => array(
        'feeds' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
    ),
    'aggregator_page_rss' => array(
      'arguments' => array(
        'feeds' => NULL,
        'category' => NULL,
      ),
      'file' => 'aggregator.pages.inc',
    ),
  );
}

/**
 * Implementation of hook_menu().
 */
function aggregator_menu() {
  $items['admin/content/aggregator'] = array(
    'title' => 'Feed aggregator',
    'description' => "Configure which content your site aggregates from other sites, how often it polls them, and how they're categorized.",
    'page callback' => 'aggregator_admin_overview',
    'access arguments' => array(
      'administer news feeds',
    ),
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/content/aggregator/add/feed'] = array(
    'title' => 'Add feed',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_feed',
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'parent' => 'admin/content/aggregator',
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/content/aggregator/add/category'] = array(
    'title' => 'Add category',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_category',
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'parent' => 'admin/content/aggregator',
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/content/aggregator/remove/%aggregator_feed'] = array(
    'title' => 'Remove items',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_admin_remove_feed',
      4,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/content/aggregator/update/%aggregator_feed'] = array(
    'title' => 'Update items',
    'page callback' => 'aggregator_admin_refresh_feed',
    'page arguments' => array(
      4,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/content/aggregator/list'] = array(
    'title' => 'List',
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
  $items['admin/content/aggregator/settings'] = array(
    'title' => 'Settings',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_admin_settings',
    ),
    'type' => MENU_LOCAL_TASK,
    'weight' => 10,
    'access arguments' => array(
      'administer news feeds',
    ),
    'file' => 'aggregator.admin.inc',
  );
  $items['aggregator'] = array(
    'title' => 'Feed aggregator',
    'page callback' => 'aggregator_page_last',
    'access arguments' => array(
      'access news feeds',
    ),
    'weight' => 5,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/sources'] = array(
    'title' => 'Sources',
    'page callback' => 'aggregator_page_sources',
    'access arguments' => array(
      'access news feeds',
    ),
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/categories'] = array(
    'title' => 'Categories',
    'page callback' => 'aggregator_page_categories',
    'access callback' => '_aggregator_has_categories',
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/rss'] = array(
    'title' => 'RSS feed',
    'page callback' => 'aggregator_page_rss',
    'access arguments' => array(
      'access news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/opml'] = array(
    'title' => 'OPML feed',
    'page callback' => 'aggregator_page_opml',
    'access arguments' => array(
      'access news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/categories/%aggregator_category'] = array(
    'title callback' => '_aggregator_category_title',
    'title arguments' => array(
      2,
    ),
    'page callback' => 'aggregator_page_category',
    'page arguments' => array(
      2,
    ),
    'access callback' => 'user_access',
    'access arguments' => array(
      'access news feeds',
    ),
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/categories/%aggregator_category/view'] = array(
    'title' => 'View',
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
  $items['aggregator/categories/%aggregator_category/categorize'] = array(
    'title' => 'Categorize',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_page_category',
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/categories/%aggregator_category/configure'] = array(
    'title' => 'Configure',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_category',
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
    'file' => 'aggregator.admin.inc',
  );
  $items['aggregator/sources/%aggregator_feed'] = array(
    'page callback' => 'aggregator_page_source',
    'page arguments' => array(
      2,
    ),
    'access arguments' => array(
      'access news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/sources/%aggregator_feed/view'] = array(
    'title' => 'View',
    'type' => MENU_DEFAULT_LOCAL_TASK,
    'weight' => -10,
  );
  $items['aggregator/sources/%aggregator_feed/categorize'] = array(
    'title' => 'Categorize',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_page_source',
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'file' => 'aggregator.pages.inc',
  );
  $items['aggregator/sources/%aggregator_feed/configure'] = array(
    'title' => 'Configure',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_feed',
      2,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_LOCAL_TASK,
    'weight' => 1,
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/content/aggregator/edit/feed/%aggregator_feed'] = array(
    'title' => 'Edit feed',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_feed',
      5,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.admin.inc',
  );
  $items['admin/content/aggregator/edit/category/%aggregator_category'] = array(
    'title' => 'Edit category',
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'aggregator_form_category',
      5,
    ),
    'access arguments' => array(
      'administer news feeds',
    ),
    'type' => MENU_CALLBACK,
    'file' => 'aggregator.admin.inc',
  );
  return $items;
}

/**
 * Menu callback.
 *
 * @return
 *   An aggregator category title.
 */
function _aggregator_category_title($category) {
  return $category['title'];
}

/**
 * Implementation of hook_init().
 */
function aggregator_init() {
  drupal_add_css(drupal_get_path('module', 'aggregator') . '/aggregator.css');
}

/**
 * Find out whether there are any aggregator categories.
 *
 * @return
 *   TRUE if there is at least one category and the user has access to them, FALSE otherwise.
 */
function _aggregator_has_categories() {
  return user_access('access news feeds') && db_result(db_query('SELECT COUNT(*) FROM {aggregator_category}'));
}

/**
 * Implementation of hook_perm().
 */
function aggregator_perm() {
  return array(
    'administer news feeds',
    'access news feeds',
  );
}

/**
 * Implementation of hook_cron().
 *
 * Checks news feeds for updates once their refresh interval has elapsed.
 */
function aggregator_cron() {
  $result = db_query('SELECT * FROM {aggregator_feed} WHERE checked + refresh < %d', time());
  while ($feed = db_fetch_array($result)) {
    aggregator_refresh($feed);
  }
}

/**
 * Implementation of hook_block().
 *
 * Generates blocks for the latest news items in each category and feed.
 */
function aggregator_block($op = 'list', $delta = 0, $edit = array()) {
  if ($op == 'list') {
    $result = db_query('SELECT cid, title FROM {aggregator_category} ORDER BY title');
    while ($category = db_fetch_object($result)) {
      $block['category-' . $category->cid]['info'] = t('!title category latest items', array(
        '!title' => $category->title,
      ));
    }
    $result = db_query('SELECT fid, title FROM {aggregator_feed} ORDER BY fid');
    while ($feed = db_fetch_object($result)) {
      $block['feed-' . $feed->fid]['info'] = t('!title feed latest items', array(
        '!title' => $feed->title,
      ));
    }
  }
  else {
    if ($op == 'configure') {
      list($type, $id) = explode('-', $delta);
      if ($type == 'category') {
        $value = db_result(db_query('SELECT block FROM {aggregator_category} WHERE cid = %d', $id));
      }
      else {
        $value = db_result(db_query('SELECT block FROM {aggregator_feed} WHERE fid = %d', $id));
      }
      $form['block'] = array(
        '#type' => 'select',
        '#title' => t('Number of news items in block'),
        '#default_value' => $value,
        '#options' => drupal_map_assoc(array(
          2,
          3,
          4,
          5,
          6,
          7,
          8,
          9,
          10,
          11,
          12,
          13,
          14,
          15,
          16,
          17,
          18,
          19,
          20,
        )),
      );
      return $form;
    }
    else {
      if ($op == 'save') {
        list($type, $id) = explode('-', $delta);
        if ($type == 'category') {
          $value = db_query('UPDATE {aggregator_category} SET block = %d WHERE cid = %d', $edit['block'], $id);
        }
        else {
          $value = db_query('UPDATE {aggregator_feed} SET block = %d WHERE fid = %d', $edit['block'], $id);
        }
      }
      else {
        if ($op == 'view') {
          if (user_access('access news feeds')) {
            list($type, $id) = explode('-', $delta);
            switch ($type) {
              case 'feed':
                if ($feed = db_fetch_object(db_query('SELECT fid, title, block FROM {aggregator_feed} WHERE fid = %d', $id))) {
                  $block['subject'] = check_plain($feed->title);
                  $result = db_query_range('SELECT * FROM {aggregator_item} WHERE fid = %d ORDER BY timestamp DESC, iid DESC', $feed->fid, 0, $feed->block);
                  $read_more = theme('more_link', url('aggregator/sources/' . $feed->fid), t("View this feed's recent news."));
                }
                break;
              case 'category':
                if ($category = db_fetch_object(db_query('SELECT cid, title, block FROM {aggregator_category} WHERE cid = %d', $id))) {
                  $block['subject'] = check_plain($category->title);
                  $result = db_query_range('SELECT i.* FROM {aggregator_category_item} ci LEFT JOIN {aggregator_item} i ON ci.iid = i.iid WHERE ci.cid = %d ORDER BY i.timestamp DESC, i.iid DESC', $category->cid, 0, $category->block);
                  $read_more = theme('more_link', url('aggregator/categories/' . $category->cid), t("View this category's recent news."));
                }
                break;
            }
            $items = array();
            while ($item = db_fetch_object($result)) {
              $items[] = theme('aggregator_block_item', $item);
            }

            // Only display the block if there are items to show.
            if (count($items) > 0) {
              $block['content'] = theme('item_list', $items) . $read_more;
            }
          }
        }
      }
    }
  }
  if (isset($block)) {
    return $block;
  }
}

/**
 * Add/edit/delete aggregator categories.
 *
 * @param $edit
 *   An associative array describing the category to be added/edited/deleted.
 */
function aggregator_save_category($edit) {
  $link_path = 'aggregator/categories/';
  if (!empty($edit['cid'])) {
    $link_path .= $edit['cid'];
    if (!empty($edit['title'])) {
      db_query("UPDATE {aggregator_category} SET title = '%s', description = '%s' WHERE cid = %d", $edit['title'], $edit['description'], $edit['cid']);
      $op = 'update';
    }
    else {
      db_query('DELETE FROM {aggregator_category} WHERE cid = %d', $edit['cid']);

      // Make sure there is no active block for this category.
      db_query("DELETE FROM {blocks} WHERE module = '%s' AND delta = '%s'", 'aggregator', 'category-' . $edit['cid']);
      $edit['title'] = '';
      $op = 'delete';
    }
  }
  else {
    if (!empty($edit['title'])) {

      // A single unique id for bundles and feeds, to use in blocks
      db_query("INSERT INTO {aggregator_category} (title, description, block) VALUES ('%s', '%s', 5)", $edit['title'], $edit['description']);
      $link_path .= db_last_insert_id('aggregator_category', 'cid');
      $op = 'insert';
    }
  }
  if (isset($op)) {
    menu_link_maintain('aggregator', $op, $link_path, $edit['title']);
  }
}

/**
 * Add/edit/delete an aggregator feed.
 *
 * @param $edit
 *   An associative array describing the feed to be added/edited/deleted.
 */
function aggregator_save_feed($edit) {
  if (!empty($edit['fid'])) {

    // An existing feed is being modified, delete the category listings.
    db_query('DELETE FROM {aggregator_category_feed} WHERE fid = %d', $edit['fid']);
  }
  if (!empty($edit['fid']) && !empty($edit['title'])) {
    db_query("UPDATE {aggregator_feed} SET title = '%s', url = '%s', refresh = %d WHERE fid = %d", $edit['title'], $edit['url'], $edit['refresh'], $edit['fid']);
  }
  else {
    if (!empty($edit['fid'])) {
      $items = array();
      $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d', $edit['fid']);
      while ($item = db_fetch_object($result)) {
        $items[] = "iid = {$item->iid}";
      }
      if (!empty($items)) {
        db_query('DELETE FROM {aggregator_category_item} WHERE ' . implode(' OR ', $items));
      }
      db_query('DELETE FROM {aggregator_feed} WHERE fid = %d', $edit['fid']);
      db_query('DELETE FROM {aggregator_item} WHERE fid = %d', $edit['fid']);

      // Make sure there is no active block for this feed.
      db_query("DELETE FROM {blocks} WHERE module = '%s' AND delta = '%s'", 'aggregator', 'feed-' . $edit['fid']);
    }
    else {
      if (!empty($edit['title'])) {
        db_query("INSERT INTO {aggregator_feed} (title, url, refresh, block, description, image) VALUES ('%s', '%s', %d, 5, '', '')", $edit['title'], $edit['url'], $edit['refresh']);

        // A single unique id for bundles and feeds, to use in blocks.
        $edit['fid'] = db_last_insert_id('aggregator_feed', 'fid');
      }
    }
  }
  if (!empty($edit['title'])) {

    // The feed is being saved, save the categories as well.
    if (!empty($edit['category'])) {
      foreach ($edit['category'] as $cid => $value) {
        if ($value) {
          db_query('INSERT INTO {aggregator_category_feed} (fid, cid) VALUES (%d, %d)', $edit['fid'], $cid);
        }
      }
    }
  }
}

/**
 * Removes all items from a feed.
 *
 * @param $feed
 *   An associative array describing the feed to be cleared.
 */
function aggregator_remove($feed) {
  $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d', $feed['fid']);
  while ($item = db_fetch_object($result)) {
    $items[] = "iid = {$item->iid}";
  }
  if (!empty($items)) {
    db_query('DELETE FROM {aggregator_category_item} WHERE ' . implode(' OR ', $items));
  }
  db_query('DELETE FROM {aggregator_item} WHERE fid = %d', $feed['fid']);
  db_query("UPDATE {aggregator_feed} SET checked = 0, etag = '', modified = 0 WHERE fid = %d", $feed['fid']);
  drupal_set_message(t('The news items from %site have been removed.', array(
    '%site' => $feed['title'],
  )));
}

/**
 * Call-back function used by the XML parser.
 */
function aggregator_element_start($parser, $name, $attributes) {
  global $item, $element, $tag, $items, $channel;
  switch ($name) {
    case 'IMAGE':
    case 'TEXTINPUT':
    case 'CONTENT':
    case 'SUMMARY':
    case 'TAGLINE':
    case 'SUBTITLE':
    case 'LOGO':
    case 'INFO':
      $element = $name;
      break;
    case 'ID':
      if ($element != 'ITEM') {
        $element = $name;
      }
    case 'LINK':
      if (!empty($attributes['REL']) && $attributes['REL'] == 'alternate') {
        if ($element == 'ITEM') {
          $items[$item]['LINK'] = $attributes['HREF'];
        }
        else {
          $channel['LINK'] = $attributes['HREF'];
        }
      }
      break;
    case 'ITEM':
      $element = $name;
      $item += 1;
      break;
    case 'ENTRY':
      $element = 'ITEM';
      $item += 1;
      break;
  }
  $tag = $name;
}

/**
 * Call-back function used by the XML parser.
 */
function aggregator_element_end($parser, $name) {
  global $element;
  switch ($name) {
    case 'IMAGE':
    case 'TEXTINPUT':
    case 'ITEM':
    case 'ENTRY':
    case 'CONTENT':
    case 'INFO':
      $element = '';
      break;
    case 'ID':
      if ($element == 'ID') {
        $element = '';
      }
  }
}

/**
 * Call-back function used by the XML parser.
 */
function aggregator_element_data($parser, $data) {
  global $channel, $element, $items, $item, $image, $tag;
  $items += array(
    $item => array(),
  );
  switch ($element) {
    case 'ITEM':
      $items[$item] += array(
        $tag => '',
      );
      $items[$item][$tag] .= $data;
      break;
    case 'IMAGE':
    case 'LOGO':
      $image += array(
        $tag => '',
      );
      $image[$tag] .= $data;
      break;
    case 'LINK':
      if ($data) {
        $items[$item] += array(
          $tag => '',
        );
        $items[$item][$tag] .= $data;
      }
      break;
    case 'CONTENT':
      $items[$item] += array(
        'CONTENT' => '',
      );
      $items[$item]['CONTENT'] .= $data;
      break;
    case 'SUMMARY':
      $items[$item] += array(
        'SUMMARY' => '',
      );
      $items[$item]['SUMMARY'] .= $data;
      break;
    case 'TAGLINE':
    case 'SUBTITLE':
      $channel += array(
        'DESCRIPTION' => '',
      );
      $channel['DESCRIPTION'] .= $data;
      break;
    case 'INFO':
    case 'ID':
    case 'TEXTINPUT':

      // The sub-element is not supported. However, we must recognize
      // it or its contents will end up in the item array.
      break;
    default:
      $channel += array(
        $tag => '',
      );
      $channel[$tag] .= $data;
  }
}

/**
 * Checks a news feed for new items.
 *
 * @param $feed
 *   An associative array describing the feed to be refreshed.
 */
function aggregator_refresh($feed) {
  global $channel, $image;

  // Generate conditional GET headers.
  $headers = array();
  if ($feed['etag']) {
    $headers['If-None-Match'] = $feed['etag'];
  }
  if ($feed['modified']) {
    $headers['If-Modified-Since'] = gmdate('D, d M Y H:i:s', $feed['modified']) . ' GMT';
  }

  // Request feed.
  $result = drupal_http_request($feed['url'], $headers);

  // Process HTTP response code.
  switch ($result->code) {
    case 304:
      db_query('UPDATE {aggregator_feed} SET checked = %d WHERE fid = %d', time(), $feed['fid']);
      drupal_set_message(t('There is no new syndicated content from %site.', array(
        '%site' => $feed['title'],
      )));
      break;
    case 301:
      $feed['url'] = $result->redirect_url;
      watchdog('aggregator', 'Updated URL for feed %title to %url.', array(
        '%title' => $feed['title'],
        '%url' => $feed['url'],
      ));

    // Deliberate no break.
    case 200:
    case 302:
    case 307:

      // Filter the input data:
      if (aggregator_parse_feed($result->data, $feed)) {
        $modified = empty($result->headers['Last-Modified']) ? 0 : strtotime($result->headers['Last-Modified']);

        // Prepare the channel data.
        foreach ($channel as $key => $value) {
          $channel[$key] = trim($value);
        }

        // Prepare the image data (if any).
        foreach ($image as $key => $value) {
          $image[$key] = trim($value);
        }
        if (!empty($image['LINK']) && !empty($image['URL']) && !empty($image['TITLE'])) {

          // Note, we should really use theme_image() here but that only works with local images it won't work with images fetched with a URL unless PHP version > 5
          $image = '<a href="' . check_url($image['LINK']) . '" class="feed-image"><img src="' . check_url($image['URL']) . '" alt="' . check_plain($image['TITLE']) . '" /></a>';
        }
        else {
          $image = NULL;
        }
        $etag = empty($result->headers['ETag']) ? '' : $result->headers['ETag'];

        // Update the feed data.
        db_query("UPDATE {aggregator_feed} SET url = '%s', checked = %d, link = '%s', description = '%s', image = '%s', etag = '%s', modified = %d WHERE fid = %d", $feed['url'], time(), $channel['LINK'], $channel['DESCRIPTION'], $image, $etag, $modified, $feed['fid']);

        // Clear the cache.
        cache_clear_all();
        watchdog('aggregator', 'There is new syndicated content from %site.', array(
          '%site' => $feed['title'],
        ));
        drupal_set_message(t('There is new syndicated content from %site.', array(
          '%site' => $feed['title'],
        )));
      }
      break;
    default:
      watchdog('aggregator', 'The feed from %site seems to be broken, due to "%error".', array(
        '%site' => $feed['title'],
        '%error' => $result->code . ' ' . $result->error,
      ), WATCHDOG_WARNING);
      drupal_set_message(t('The feed from %site seems to be broken, because of error "%error".', array(
        '%site' => $feed['title'],
        '%error' => $result->code . ' ' . $result->error,
      )));
  }
}

/**
 * Parse the W3C date/time format, a subset of ISO 8601. PHP date parsing
 * functions do not handle this format.
 * See http://www.w3.org/TR/NOTE-datetime for more information.
 * Originally from MagpieRSS (http://magpierss.sourceforge.net/).
 *
 * @param $date_str
 *   A string with a potentially W3C DTF date.
 * @return
 *   A timestamp if parsed successfully or FALSE if not.
 */
function aggregator_parse_w3cdtf($date_str) {
  if (preg_match('/(\\d{4})-(\\d{2})-(\\d{2})T(\\d{2}):(\\d{2})(:(\\d{2}))?(?:([-+])(\\d{2}):?(\\d{2})|(Z))?/', $date_str, $match)) {
    list($year, $month, $day, $hours, $minutes, $seconds) = array(
      $match[1],
      $match[2],
      $match[3],
      $match[4],
      $match[5],
      $match[6],
    );

    // calc epoch for current date assuming GMT
    $epoch = gmmktime($hours, $minutes, $seconds, $month, $day, $year);
    if ($match[10] != 'Z') {

      // Z is zulu time, aka GMT
      list($tz_mod, $tz_hour, $tz_min) = array(
        $match[8],
        $match[9],
        $match[10],
      );

      // zero out the variables
      if (!$tz_hour) {
        $tz_hour = 0;
      }
      if (!$tz_min) {
        $tz_min = 0;
      }
      $offset_secs = ($tz_hour * 60 + $tz_min) * 60;

      // is timezone ahead of GMT?  then subtract offset
      if ($tz_mod == '+') {
        $offset_secs *= -1;
      }
      $epoch += $offset_secs;
    }
    return $epoch;
  }
  else {
    return FALSE;
  }
}

/**
 * Parse a feed and store its items.
 *
 * @param $data
 *   The feed data.
 * @param $feed
 *   An associative array describing the feed to be parsed.
 * @return
 *   0 on error, 1 otherwise.
 */
function aggregator_parse_feed(&$data, $feed) {
  global $items, $image, $channel;

  // Unset the global variables before we use them:
  unset($GLOBALS['element'], $GLOBALS['item'], $GLOBALS['tag']);
  $items = array();
  $image = array();
  $channel = array();

  // parse the data:
  $xml_parser = drupal_xml_parser_create($data);
  xml_set_element_handler($xml_parser, 'aggregator_element_start', 'aggregator_element_end');
  xml_set_character_data_handler($xml_parser, 'aggregator_element_data');
  if (!xml_parse($xml_parser, $data, 1)) {
    watchdog('aggregator', 'The feed from %site seems to be broken, due to an error "%error" on line %line.', array(
      '%site' => $feed['title'],
      '%error' => xml_error_string(xml_get_error_code($xml_parser)),
      '%line' => xml_get_current_line_number($xml_parser),
    ), WATCHDOG_WARNING);
    drupal_set_message(t('The feed from %site seems to be broken, because of error "%error" on line %line.', array(
      '%site' => $feed['title'],
      '%error' => xml_error_string(xml_get_error_code($xml_parser)),
      '%line' => xml_get_current_line_number($xml_parser),
    )), 'error');
    return 0;
  }
  xml_parser_free($xml_parser);

  // We reverse the array such that we store the first item last, and the last
  // item first. In the database, the newest item should be at the top.
  $items = array_reverse($items);

  // Initialize variables.
  $title = $link = $author = $description = $guid = NULL;
  foreach ($items as $item) {
    unset($title, $link, $author, $description, $guid);

    // Prepare the item:
    foreach ($item as $key => $value) {
      $item[$key] = trim($value);
    }

    // Resolve the item's title. If no title is found, we use up to 40
    // characters of the description ending at a word boundary but not
    // splitting potential entities.
    if (!empty($item['TITLE'])) {
      $title = $item['TITLE'];
    }
    elseif (!empty($item['DESCRIPTION'])) {
      $title = preg_replace('/^(.*)[^\\w;&].*?$/', "\\1", truncate_utf8($item['DESCRIPTION'], 40));
    }
    else {
      $title = '';
    }

    // Resolve the items link.
    if (!empty($item['LINK'])) {
      $link = $item['LINK'];
    }
    else {
      $link = $feed['link'];
    }

    // Atom feeds use ID rather than GUID.
    if (isset($item['GUID'])) {
      $guid = $item['GUID'];
    }
    elseif (isset($item['ID'])) {
      $guid = $item['ID'];
    }
    else {
      $guid = '';
    }

    // Atom feeds have a CONTENT and/or SUMMARY tag instead of a DESCRIPTION tag.
    if (!empty($item['CONTENT:ENCODED'])) {
      $item['DESCRIPTION'] = $item['CONTENT:ENCODED'];
    }
    else {
      if (!empty($item['SUMMARY'])) {
        $item['DESCRIPTION'] = $item['SUMMARY'];
      }
      else {
        if (!empty($item['CONTENT'])) {
          $item['DESCRIPTION'] = $item['CONTENT'];
        }
      }
    }

    // Try to resolve and parse the item's publication date. If no date is
    // found, we use the current date instead.
    $date = 'now';
    foreach (array(
      'PUBDATE',
      'DC:DATE',
      'DCTERMS:ISSUED',
      'DCTERMS:CREATED',
      'DCTERMS:MODIFIED',
      'ISSUED',
      'CREATED',
      'MODIFIED',
      'PUBLISHED',
      'UPDATED',
    ) as $key) {
      if (!empty($item[$key])) {
        $date = $item[$key];
        break;
      }
    }
    $timestamp = strtotime($date);

    // As of PHP 5.1.0, strtotime returns FALSE on failure instead of -1.
    if ($timestamp <= 0) {
      $timestamp = aggregator_parse_w3cdtf($date);

      // Returns FALSE on failure
      if (!$timestamp) {
        $timestamp = time();

        // better than nothing
      }
    }

    // Resolve dc:creator tag as the item author if author tag is not set.
    if (empty($item['AUTHOR']) && !empty($item['DC:CREATOR'])) {
      $item['AUTHOR'] = $item['DC:CREATOR'];
    }

    // Save this item. Try to avoid duplicate entries as much as possible. If
    // we find a duplicate entry, we resolve it and pass along its ID is such
    // that we can update it if needed.
    if (!empty($guid)) {
      $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND guid = '%s'", $feed['fid'], $guid));
    }
    else {
      if ($link && $link != $feed['link'] && $link != $feed['url']) {
        $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND link = '%s'", $feed['fid'], $link));
      }
      else {
        $entry = db_fetch_object(db_query("SELECT iid FROM {aggregator_item} WHERE fid = %d AND title = '%s'", $feed['fid'], $title));
      }
    }
    $item += array(
      'AUTHOR' => '',
      'DESCRIPTION' => '',
    );
    aggregator_save_item(array(
      'iid' => isset($entry->iid) ? $entry->iid : '',
      'fid' => $feed['fid'],
      'timestamp' => $timestamp,
      'title' => $title,
      'link' => $link,
      'author' => $item['AUTHOR'],
      'description' => $item['DESCRIPTION'],
      'guid' => $guid,
    ));
  }

  // Remove all items that are older than flush item timer.
  $age = time() - variable_get('aggregator_clear', 9676800);
  $result = db_query('SELECT iid FROM {aggregator_item} WHERE fid = %d AND timestamp < %d', $feed['fid'], $age);
  $items = array();
  $num_rows = FALSE;
  while ($item = db_fetch_object($result)) {
    $items[] = $item->iid;
    $num_rows = TRUE;
  }
  if ($num_rows) {
    db_query('DELETE FROM {aggregator_category_item} WHERE iid IN (' . implode(', ', $items) . ')');
    db_query('DELETE FROM {aggregator_item} WHERE fid = %d AND timestamp < %d', $feed['fid'], $age);
  }
  return 1;
}

/**
 * Add/edit/delete an aggregator item.
 *
 * @param $edit
 *   An associative array describing the item to be added/edited/deleted.
 */
function aggregator_save_item($edit) {
  if ($edit['iid'] && $edit['title']) {
    db_query("UPDATE {aggregator_item} SET title = '%s', link = '%s', author = '%s', description = '%s', guid = '%s', timestamp = %d WHERE iid = %d", $edit['title'], $edit['link'], $edit['author'], $edit['description'], $edit['guid'], $edit['timestamp'], $edit['iid']);
  }
  else {
    if ($edit['iid']) {
      db_query('DELETE FROM {aggregator_item} WHERE iid = %d', $edit['iid']);
      db_query('DELETE FROM {aggregator_category_item} WHERE iid = %d', $edit['iid']);
    }
    else {
      if ($edit['title'] && $edit['link']) {
        db_query("INSERT INTO {aggregator_item} (fid, title, link, author, description, timestamp, guid) VALUES (%d, '%s', '%s', '%s', '%s', %d, '%s')", $edit['fid'], $edit['title'], $edit['link'], $edit['author'], $edit['description'], $edit['timestamp'], $edit['guid']);
        $edit['iid'] = db_last_insert_id('aggregator_item', 'iid');

        // file the items in the categories indicated by the feed
        $categories = db_query('SELECT cid FROM {aggregator_category_feed} WHERE fid = %d', $edit['fid']);
        while ($category = db_fetch_object($categories)) {
          db_query('INSERT INTO {aggregator_category_item} (cid, iid) VALUES (%d, %d)', $category->cid, $edit['iid']);
        }
      }
    }
  }
}

/**
 * Load an aggregator feed.
 *
 * @param $fid
 *   The feed id.
 * @return
 *   An associative array describing the feed.
 */
function aggregator_feed_load($fid) {
  static $feeds;
  if (!isset($feeds[$fid])) {
    $feeds[$fid] = db_fetch_array(db_query('SELECT * FROM {aggregator_feed} WHERE fid = %d', $fid));
  }
  return $feeds[$fid];
}

/**
 * Load an aggregator category.
 *
 * @param $cid
 *   The category id.
 * @return
 *   An associative array describing the category.
 */
function aggregator_category_load($cid) {
  static $categories;
  if (!isset($categories[$cid])) {
    $categories[$cid] = db_fetch_array(db_query('SELECT * FROM {aggregator_category} WHERE cid = %d', $cid));
  }
  return $categories[$cid];
}

/**
 * Format an individual feed item for display in the block.
 *
 * @param $item
 *   The item to be displayed.
 * @param $feed
 *   Not used.
 * @return
 *   The item HTML.
 * @ingroup themeable
 */
function theme_aggregator_block_item($item, $feed = 0) {
  global $user;
  $output = '';
  if ($user->uid && module_exists('blog') && user_access('create blog entries')) {
    if ($image = theme('image', 'misc/blog.png', t('blog it'), t('blog it'))) {
      $output .= '<div class="icon">' . l($image, 'node/add/blog', array(
        'attributes' => array(
          'title' => t('Comment on this news item in your personal blog.'),
          'class' => 'blog-it',
        ),
        'query' => "iid={$item->iid}",
        'html' => TRUE,
      )) . '</div>';
    }
  }

  // Display the external link to the item.
  $output .= '<a href="' . check_url($item->link) . '">' . check_plain($item->title) . "</a>\n";
  return $output;
}

/**
 * Safely render HTML content, as allowed.
 *
 * @param $value
 *   The content to be filtered.
 * @return
 *   The filtered content.
 */
function aggregator_filter_xss($value) {
  return filter_xss($value, preg_split('/\\s+|<|>/', variable_get('aggregator_allowed_html_tags', '<a> <b> <br> <dd> <dl> <dt> <em> <i> <li> <ol> <p> <strong> <u> <ul>'), -1, PREG_SPLIT_NO_EMPTY));
}

/**
 * Helper function for drupal_map_assoc.
 *
 * @param $count
 *   Items count.
 * @return
 *   Plural-formatted "@count items"
 */
function _aggregator_items($count) {
  return format_plural($count, '1 item', '@count items');
}

Functions

Namesort descending Description
aggregator_block Implementation of hook_block().
aggregator_category_load Load an aggregator category.
aggregator_cron Implementation of hook_cron().
aggregator_element_data Call-back function used by the XML parser.
aggregator_element_end Call-back function used by the XML parser.
aggregator_element_start Call-back function used by the XML parser.
aggregator_feed_load Load an aggregator feed.
aggregator_filter_xss Safely render HTML content, as allowed.
aggregator_help Implementation of hook_help().
aggregator_init Implementation of hook_init().
aggregator_menu Implementation of hook_menu().
aggregator_parse_feed Parse a feed and store its items.
aggregator_parse_w3cdtf Parse the W3C date/time format, a subset of ISO 8601. PHP date parsing functions do not handle this format. See http://www.w3.org/TR/NOTE-datetime for more information. Originally from MagpieRSS (http://magpierss.sourceforge.net/).
aggregator_perm Implementation of hook_perm().
aggregator_refresh Checks a news feed for new items.
aggregator_remove Removes all items from a feed.
aggregator_save_category Add/edit/delete aggregator categories.
aggregator_save_feed Add/edit/delete an aggregator feed.
aggregator_save_item Add/edit/delete an aggregator item.
aggregator_theme Implementation of hook_theme()
theme_aggregator_block_item Format an individual feed item for display in the block.
_aggregator_category_title Menu callback.
_aggregator_has_categories Find out whether there are any aggregator categories.
_aggregator_items Helper function for drupal_map_assoc.