You are here

htmlpurifier.module in HTML Purifier 6

Implements HTML Purifier as a Drupal filter.

File

htmlpurifier.module
View source
<?php

/**
 * @file
 * Implements HTML Purifier as a Drupal filter.
 */

// -- HOOK IMPLEMENTATIONS -------------------------------------------------- //

/**
 * Implementation of hook_help().
 */
function htmlpurifier_help($path, $arg) {
  $output = NULL;
  switch ($path) {
    case 'admin/modules#htmlpurifier':
      $output = t('Filter that removes malicious HTML and ensures standards compliant output.');
      break;
  }
  return $output;
}

/**
 * Implementation of hook_cron().
 * @note
 *    Checks for updates to the HTML Purifier library.
 */
function htmlpurifier_cron() {

  // Maybe this should be changed in the future:
  $result = drupal_http_request('http://htmlpurifier.org/live/VERSION');
  $version = trim($result->data);
  variable_set('htmlpurifier_version_current', $version);
}

/**
 * Implementation of hook_init().
 * @note
 *    Displays any need to update the HTML Purifier library.
 */
function htmlpurifier_init() {
  if (user_access('access administration pages')) {
    $current = variable_get('htmlpurifier_version_current', FALSE);
    if (!$current) {
      htmlpurifier_cron();
      $current = variable_get('htmlpurifier_version_current', FALSE);
    }
    $ours = variable_get('htmlpurifier_version_ours', FALSE);
    if (!$ours || version_compare($current, $ours, '>')) {

      // Update our version number if it can't be found, or there's a mismatch.
      _htmlpurifier_load();
      $ours = variable_get('htmlpurifier_version_ours', FALSE);
    }
    if ($current && $ours && version_compare($current, $ours, '>')) {
      drupal_set_message(t('Your version of HTML Purifier is currently out of date; ' . 'the most recent version is %version. You can download the latest ' . 'version at <a href="http://htmlpurifier.org">htmlpurifier.org</a>. ' . 'To update, place the contents of HTML Purifier\'s <code>library/</code> folder in ' . '<code>modules/htmlpurifier/library/</code>.', array(
        '%version' => $current,
      )), 'warning', FALSE);
    }
  }
}

/**
 * Implementation of hook_filter().
 */
function htmlpurifier_filter($op, $delta = 0, $format = -1, $text = '') {
  switch ($op) {
    case 'list':
      return array(
        0 => t('HTML Purifier'),
        1 => t('HTML Purifier (advanced)'),
      );
    case 'no cache':

      // Since HTML Purifier implements its own caching layer, having filter
      // cache it again is wasteful. Returns FALSE if double caching is permitted.
      return !variable_get("htmlpurifier_doublecache_{$format}", FALSE);
    case 'description':
      $common = t('Removes malicious HTML code and ensures that the output ' . 'is standards compliant. <strong>Warning:</strong> For performance ' . 'reasons, please ensure that there are no highly dynamic filters before HTML Purifier. ');
      switch ($delta) {
        case 0:
          return $common;
        case 1:
          return $common . t('<em>This version has advanced configuration options, do not enable both at the same time.</em>');
      }
    case 'prepare':
      return $text;
    case 'process':
      return _htmlpurifier_process($text, $format);
    case 'settings':
      return _htmlpurifier_settings($delta, $format);
    default:
      return NULL;
  }
}

/**
 * Implementation of hook_filter_tips().
 */
function htmlpurifier_filter_tips($delta, $format, $long = FALSE) {
  if (variable_get("htmlpurifier_help_{$format}", TRUE)) {
    return t('HTML tags will be transformed to conform to HTML standards.');
  }
}

// -- INTERNAL FUNCTIONS ---------------------------------------------------- //

/**
 * Processes HTML according to a format and returns purified HTML. Makes a 
 * cache pass if possible.
 * 
 * @param string $text
 *    Text to purify
 * @param int $format
 *    Input format corresponding to HTML Purifier's configuration.
 * @param boolean $cache
 *    Whether or not to check the cache.
 * 
 * @note
 *    We ignore $delta because the only difference it makes is in the configuration
 *    screen.
 */
function _htmlpurifier_process($text, $format, $cache = TRUE) {
  if ($cache) {
    $cid = $format . ':' . md5($text);
    $old = cache_get($cid, 'cache_htmlpurifier');
    if ($old) {
      return $old->data;
    }
  }
  _htmlpurifier_load();
  $config = _htmlpurifier_get_config($format);
  $purifier = new HTMLPurifier($config);
  $ret = $purifier
    ->purify($text);
  if ($cache) {
    cache_set($cid, $ret, 'cache_htmlpurifier', CACHE_PERMANENT);
  }
  return $ret;
}

/**
 * Loads the HTML Purifier library, and performs global initialization.
 */
function _htmlpurifier_load() {
  if (class_exists('HTMLPurifier')) {
    return;
  }
  $module_path = drupal_get_path('module', 'htmlpurifier');
  require_once "{$module_path}/library/HTMLPurifier.auto.php";
  require_once "{$module_path}/HTMLPurifier_DefinitionCache_Drupal.php";
  $factory = HTMLPurifier_DefinitionCacheFactory::instance();
  $factory
    ->register('Drupal', 'HTMLPurifier_DefinitionCache_Drupal');

  // Register the version as a variable:
  if (!defined('HTMLPurifier::VERSION')) {
    $purifier = new HTMLPurifier();
    $version = $purifier->version;
  }
  else {
    $version = HTMLPurifier::VERSION;
  }
  variable_set('htmlpurifier_version_ours', $version);
}

/**
 * Returns the HTMLPurifier_Config object corresponding to an input format.
 * @param int $format
 *    Input format.
 * @return
 *    Instance of HTMLPurifier_Config.
 */
function _htmlpurifier_get_config($format) {
  $config = HTMLPurifier_Config::createDefault();
  $config
    ->set('AutoFormat', 'AutoParagraph', TRUE);
  $config
    ->set('AutoFormat', 'Linkify', TRUE);
  $config
    ->set('HTML', 'Doctype', 'XHTML 1.0 Transitional');

  // Probably
  $config
    ->set('Core', 'AggressivelyFixLt', TRUE);
  $config
    ->set('Cache', 'DefinitionImpl', 'Drupal');

  // Filter HTML doesn't allow external images, so neither will we...
  // for now. This can be configured off.
  $config
    ->set('URI', 'DisableExternalResources', TRUE);
  if (!empty($_SERVER['SERVER_NAME'])) {

    // SERVER_NAME is more reliable than HTTP_HOST
    $config
      ->set('URI', 'Host', $_SERVER['SERVER_NAME']);
  }
  if (defined('LANGUAGE_RTL') && $GLOBALS['language']->direction === LANGUAGE_RTL) {
    $config
      ->set('Attr', 'DefaultTextDir', 'rtl');
  }
  if ($config_function = _htmlpurifier_config_load($format)) {
    $config_function($config);
  }
  else {
    $config_data = variable_get("htmlpurifier_config_{$format}", FALSE);

    // {FALSE, TRUE, FALSE} = {no index, everything is allowed, don't do mq fix}
    $config
      ->mergeArrayFromForm($config_data, FALSE, TRUE, FALSE);
  }
  return $config;
}

/**
 * Returns the name of the configuration function for $format, or FALSE if none
 * exists. Function name will be htmlpurifier_config_N.
 * 
 * @param int $format
 *    Integer format to check function for.
 * @return
 *    String function name for format, or FALSE if none.
 */
function _htmlpurifier_config_load($format) {
  $config_file = drupal_get_path('module', 'htmlpurifier') . "/config/{$format}.php";
  $config_function = "htmlpurifier_config_{$format}";
  if (!function_exists($config_function) && file_exists($config_file)) {
    include_once $config_file;
  }
  return function_exists($config_function) ? $config_function : FALSE;
}

/**
 * Generates a settings form for configuring HTML Purifier.
 * @param int $delta
 *    Whether or not to use advanced form (1) or not (0).
 * @param int $format
 *    Input format being configured.
 * @return
 *    Form API array.
 */
function _htmlpurifier_settings($delta, $format) {
  _htmlpurifier_load();

  // Dry run, testing for errors:
  _htmlpurifier_process('', $format, FALSE);
  $module_path = drupal_get_path('module', 'htmlpurifier');
  drupal_add_css("{$module_path}/config-form.css");

  // Makes all configuration links open in new windows; can safe lots of grief!
  drupal_add_js('$(function(){$(".hp-config a").click(function(){window.open(this.href);return false;});});', 'inline');
  drupal_add_js(HTMLPurifier_Printer_ConfigForm::getJavaScript(), 'inline');
  $form = array();
  $form['htmlpurifier'] = array(
    '#type' => 'fieldset',
    '#title' => t('HTML Purifier'),
    '#collapsible' => TRUE,
  );
  $form['htmlpurifier']["htmlpurifier_help_{$format}"] = array(
    '#type' => 'checkbox',
    '#title' => t('Display help text'),
    '#default_value' => variable_get("htmlpurifier_help_{$format}", TRUE),
    '#description' => t('If enabled, a short note will be added to the filter tips explaining that HTML will be transformed to conform with HTML standards. You may want to disable this option when the HTML Purifier is used to check the output of another filter like BBCode.'),
  );
  if ($config_function = _htmlpurifier_config_load($format)) {
    $form['htmlpurifier']['notice'] = array(
      '#type' => 'markup',
      '#value' => t('<div>Configuration function <code>!function()</code> is already defined. To edit HTML Purifier\'s configuration, edit the corresponding configuration file, which is usually <code>htmlpurifier/config/!format.php</code>. To restore the web configuration form, delete or rename this file.</div>', array(
        '!function' => $config_function,
        '!format' => $format,
      )),
    );
  }
  else {
    if ($delta == 0) {
      $title = t('Configure HTML Purifier');
      $allowed = array(
        'URI.DisableExternalResources',
        'URI.DisableResources',
        'URI.Munge',
        'Filter.YouTube',
        'Attr.EnableID',
        'HTML.Allowed',
        'HTML.ForbiddenElements',
        'HTML.ForbiddenAttributes',
        'AutoFormat.Linkify',
        'AutoFormat.AutoParagraph',
      );
    }
    else {
      $title = t('Advanced configuration options');
      $allowed = TRUE;
      $form['htmlpurifier']["htmlpurifier_doublecache_{$format}"] = array(
        '#type' => 'checkbox',
        '#title' => t('Allow double caching'),
        '#default_value' => variable_get("htmlpurifier_doublecache_{$format}", FALSE),
        '#description' => t('If enabled, HTML Purifier will tell filter that its output is cacheable. This is not usually necessary, because HTML Purifier maintains its own cache, but may be helpful if you have later filters that need to be cached.'),
      );
    }
    $intro = '<div class="form-item"><h3>' . $title . '</h3><div class="description">' . t('Please click on a directive name for more information on what it does before enabling or changing anything!') . '</div></div>';
    $config = _htmlpurifier_get_config($format);
    $config_form = new HTMLPurifier_Printer_ConfigForm("htmlpurifier_config_{$format}", 'http://htmlpurifier.org/live/configdoc/plain.html#%s');
    $form['htmlpurifier']["htmlpurifier_config_{$format}"] = array(
      '#value' => $intro . $config_form
        ->render($config, $allowed, FALSE),
      '#after_build' => array(
        '_htmlpurifier_config_hack',
      ),
    );
  }
  return $form;
}

/**
 * Fills out the form state with extra post data originating from the
 * HTML Purifier configuration form. This is an #after_build hook function.
 * 
 * @warning
 *    If someone ever gets the smart idea of changing the parameters to
 *    this function, I'm SOL! ;-)
 */
function _htmlpurifier_config_hack($form_element, &$form_state) {
  $key = $form_element['#parents'][0];
  if (!empty($form_element['#post']) && isset($form_element['#post'][$key])) {
    $form_state['values'][$key] = $form_element['#post'][$key];
  }
  return $form_element;
}

Functions

Namesort descending Description
htmlpurifier_cron Implementation of hook_cron(). @note Checks for updates to the HTML Purifier library.
htmlpurifier_filter Implementation of hook_filter().
htmlpurifier_filter_tips Implementation of hook_filter_tips().
htmlpurifier_help Implementation of hook_help().
htmlpurifier_init Implementation of hook_init(). @note Displays any need to update the HTML Purifier library.
_htmlpurifier_config_hack Fills out the form state with extra post data originating from the HTML Purifier configuration form. This is an #after_build hook function.
_htmlpurifier_config_load Returns the name of the configuration function for $format, or FALSE if none exists. Function name will be htmlpurifier_config_N.
_htmlpurifier_get_config Returns the HTMLPurifier_Config object corresponding to an input format.
_htmlpurifier_load Loads the HTML Purifier library, and performs global initialization.
_htmlpurifier_process Processes HTML according to a format and returns purified HTML. Makes a cache pass if possible.
_htmlpurifier_settings Generates a settings form for configuring HTML Purifier.