You are here

htmlpurifier.module in HTML Purifier 6.2

Implements HTML Purifier as a Drupal filter.

File

htmlpurifier.module
View source
<?php

/**
 * @file
 * Implements HTML Purifier as a Drupal filter.
 */

// -- HOOK IMPLEMENTATIONS -------------------------------------------------- //

/**
 * Implementation of hook_flush_caches().
 */
function htmlpurifier_flush_caches() {
  return array(
    'cache_htmlpurifier',
  );
}

/**
 * Implementation of hook_help().
 */
function htmlpurifier_help($path, $arg) {
  $output = NULL;
  switch ($path) {
    case 'admin/modules#htmlpurifier':
      $output = t('Filter that removes malicious HTML and ensures standards compliant output.');
      break;
  }
  return $output;
}

/**
 * Implementation of hook_cron().
 */
function htmlpurifier_cron() {

  // Force an attempt at checking for a new version; this is safe to do in
  // hook_cron because a slow timeout will not degrade the user experience.
  htmlpurifier_check_version(TRUE);
}

/**
 * Checks for updates to the HTML Purifier library.
 */
function htmlpurifier_check_version($force = FALSE) {
  if ($force || !variable_get('htmlpurifier_version_check_failed', FALSE)) {

    // Maybe this should be changed in the future:
    $result = drupal_http_request('http://htmlpurifier.org/live/VERSION');
    if ($result->code == 200) {
      $version = trim($result->data);
      variable_set('htmlpurifier_version_check_failed', FALSE);
      variable_set('htmlpurifier_version_current', $version);
      return $version;
    }
    else {
      variable_set('htmlpurifier_version_check_failed', TRUE);

      // Delete any previously known "latest" version so that people can be
      // alerted if a problem appears on a previously working site.
      variable_del('htmlpurifier_version_current');
    }
  }
}

/**
 * Implementation of hook_filter().
 */
function htmlpurifier_filter($op, $delta = 0, $format = -1, $text = '') {
  switch ($op) {
    case 'list':
      return array(
        0 => t('HTML Purifier'),
        1 => t('HTML Purifier (advanced)'),
      );
    case 'no cache':

      // Since HTML Purifier implements its own caching layer, having filter
      // cache it again is wasteful. Returns FALSE if double caching is permitted.
      return !variable_get("htmlpurifier_doublecache", FALSE);
    case 'description':
      $common = t('Removes malicious HTML code and ensures that the output ' . 'is standards compliant. <strong>Warning:</strong> For performance ' . 'reasons, please ensure that there are no highly dynamic filters before HTML Purifier. ');
      switch ($delta) {
        case 0:
          return $common;
        case 1:
          return $common . t('<em>This version has advanced configuration options, do not enable both at the same time.</em>');
      }
    case 'prepare':
      return $text;
    case 'process':
      return _htmlpurifier_process($text, $format);
    case 'settings':
      return _htmlpurifier_settings($delta, $format);
    default:
      return NULL;
  }
}

/**
 * Implementation of hook_filter_tips().
 */
function htmlpurifier_filter_tips($delta, $format, $long = FALSE) {
  if (variable_get("htmlpurifier_help_{$format}", TRUE)) {
    return t('HTML tags will be transformed to conform to HTML standards.');
  }
}

/**
 * Implementation of hook_nodeapi().
 */
function htmlpurifier_nodeapi(&$node, $op, $a3, $a4) {
  if ($op == 'view') {

    // Should we load CSS cache data from teaser or body?
    if ($a3 == TRUE) {
      _htmlpurifier_add_css($node->content['teaser']['#value'], $node->nid);
    }
    else {
      _htmlpurifier_add_css($node->content['body']['#value'], $node->nid);
    }
  }

  // @todo: Deal with CCK fields - probably needs to go in op alter?
}

/**
 * Helper function for hook_nodeapi
 *  Finds extracted style blocks based on a cache link left by hook_filter
 *  Aggregates the extracted style blocks and adds them to the document head
 *  Also removes the cache link left in hook_filter to the CSS cache
 *
 * @param string &$field
 *    Field to process, this should be the actual field value
 *      ex. $node->content['body']['#value']
 *
 * @param int $nid
 *    Node ID of the node to which these stylesheets belong
 *    Since filters don't know their node context, we have to use a token
 *      to generate the stylesheet scope, and replace it in hook_nodeapi
 */
function _htmlpurifier_add_css(&$field, $nid) {

  // Some basic validation to assure we really got a rendered field
  if (!is_string($field)) {
    return;
  }
  $cache_matches = array();
  $cache_match = preg_match('#<!-- HTML Purifier Cache \\#([-\\w]*:[\\w]*) -->#', $field, $cache_matches);

  // If there's an HTML Purifier Cache #, we need to load CSSTidy blocks
  if ($cache_match == 1) {
    $cid = 'css:' . $cache_matches[1];
    $old = cache_get($cid, 'cache_htmlpurifier');

    // We should always have some cached style blocks to load, but if we don't, just bail
    if ($old) {
      $styles = array();
      $style_rendered = '';
      foreach ($old->data as $i => $style) {

        // Replace Node ID tokens if necessary, otherwise use cached CSSTidy blocks
        // NOTE: This token is forgeable, but we expect that if the user
        // is able to invoke this transformation, it will be relatively
        // harmless.
        if (strpos($style, '[%HTMLPURIFIER:NID%]') !== FALSE) {
          $styles[$i] = str_replace('[%HTMLPURIFIER:NID%]', (int) $nid, $style);
        }
        else {
          $styles[$i] = $style;
        }

        // Save any CSSTidy blocks we find to be rendered in the document head
        if (!empty($style)) {
          $style_rendered .= $styles[$i] . "\n";
        }
      }

      // Add the rendered stylesheet to the document header
      if ($style_rendered != '') {
        drupal_set_html_head('<style type="text/css">' . "\n" . '<!--' . "\n" . $style_rendered . '--></style>');
      }

      // Remove the HTML Purifier cache key from the field argument
      $field = str_replace($cache_matches[0], '', $field);

      // If we had to update CSSTidy blocks, cache the results
      if ($old->data != $styles) {
        cache_set($cid, $styles, 'cache_htmlpurifier', CACHE_PERMANENT);
      }
    }
  }
}

// -- INTERNAL FUNCTIONS ---------------------------------------------------- //

/**
 * Processes HTML according to a format and returns purified HTML. Makes a 
 * cache pass if possible.
 * 
 * @param string $text
 *    Text to purify
 * @param int $format
 *    Input format corresponding to HTML Purifier's configuration.
 * @param boolean $cache
 *    Whether or not to check the cache.
 * 
 * @note
 *    We ignore $delta because the only difference it makes is in the configuration
 *    screen.
 */
function _htmlpurifier_process($text, $format, $cache = TRUE) {
  if ($cache) {
    $cid = $format . ':' . md5($text);
    $old = cache_get($cid, 'cache_htmlpurifier');
    if ($old) {
      return $old->data;
    }
  }
  _htmlpurifier_load();
  $config = _htmlpurifier_get_config($format);

  // If ExtractStyleBlocks is enabled, we'll need to do a bit more for CSSTidy
  $config_extractstyleblocks = $config
    ->get('Filter.ExtractStyleBlocks');

  // Maybe this works if CSSTidy is at root? CSSTidy could be other places though
  if ($config_extractstyleblocks == true) {
    _htmlpurifier_load_csstidy();
  }
  $purifier = new HTMLPurifier($config);
  $ret = $purifier
    ->purify($text);

  // If using Filter.ExtractStyleBlocks we need to handle the CSSTidy output
  if ($config_extractstyleblocks == true) {

    // We're only going to bother if we're caching! - no caching? no style blocks!
    if ($cache) {

      // Get style blocks, cache them, and help hook_nodeapi find the cache
      $styles = $purifier->context
        ->get('StyleBlocks');
      cache_set('css:' . $cid, $styles, 'cache_htmlpurifier', CACHE_PERMANENT);
      $ret = '<!-- HTML Purifier Cache #' . $cid . ' -->' . $ret;
    }
  }
  if ($cache) {
    cache_set($cid, $ret, 'cache_htmlpurifier', CACHE_PERMANENT);
  }
  return $ret;
}

/**
 * Loads the HTML Purifier library, and performs global initialization.
 */
function _htmlpurifier_load() {
  static $done = false;
  if ($done) {
    return;
  }
  $done = true;
  $module_path = drupal_get_path('module', 'htmlpurifier');
  $library_path = $module_path;
  if (function_exists('libraries_get_path')) {
    $library_path = libraries_get_path('htmlpurifier');

    // This may happen if the user has HTML Purifier installed under the
    // old configuration, but also installed libraries and forgot to
    // move it over.  There is code for emitting errors in
    // htmlpurifier.install when this is the case.
    if (!file_exists("{$library_path}/library/HTMLPurifier.auto.php")) {

      // Check for an alternate phrasing and error about it
      if (file_exists("{$library_path}/HTMLPurifier.auto.php") && !file_exists("{$module_path}/library/HTMLPurifier.auto.php")) {
        echo "HTML Purifier was installed improperly; move contents of folder {$library_path} to {$library_path}/library";
        exit;
      }
      $library_path = $module_path;
    }
  }
  if (version_compare(phpversion(), '5') < 0) {

    // If your version of PHP is too old, you're going to fail anyway
    // when you attempt to include the HTML Purifier library, so we
    // might as well try to give a useful error message.
    echo 'Your version of PHP is too old to run HTML Purifier, needs PHP 5 or later';
    exit;
  }
  require_once "{$library_path}/library/HTMLPurifier.auto.php";
  require_once "{$module_path}/HTMLPurifier_DefinitionCache_Drupal.php";
  $factory = HTMLPurifier_DefinitionCacheFactory::instance();
  $factory
    ->register('Drupal', 'HTMLPurifier_DefinitionCache_Drupal');

  // Register the version as a variable:
  $current_version = variable_get('htmlpurifier_version_ours', FALSE);
  if ($current_version != HTMLPurifier::VERSION) {
    variable_set('htmlpurifier_version_ours', HTMLPurifier::VERSION);
  }
}

/**
 * Returns the HTMLPurifier_Config object corresponding to an input format.
 * @param int $format
 *    Input format.
 * @return
 *    Instance of HTMLPurifier_Config.
 */
function _htmlpurifier_get_config($format) {
  $config = HTMLPurifier_Config::createDefault();
  $config
    ->set('AutoFormat.AutoParagraph', TRUE);
  $config
    ->set('AutoFormat.Linkify', TRUE);
  $config
    ->set('HTML.Doctype', 'XHTML 1.0 Transitional');

  // Probably
  $config
    ->set('Core.AggressivelyFixLt', TRUE);
  $config
    ->set('Cache.DefinitionImpl', 'Drupal');

  // Filter HTML doesn't allow external images, so neither will we...
  // for now. This can be configured off.
  $config
    ->set('URI.DisableExternalResources', TRUE);
  if (!empty($_SERVER['SERVER_NAME'])) {

    // SERVER_NAME is more reliable than HTTP_HOST
    $config
      ->set('URI.Host', $_SERVER['SERVER_NAME']);
  }
  if (defined('LANGUAGE_RTL') && $GLOBALS['language']->direction === LANGUAGE_RTL) {
    $config
      ->set('Attr.DefaultTextDir', 'rtl');
  }
  if ($config_function = _htmlpurifier_config_load($format)) {
    $config_function($config);
  }
  else {
    $config_data = variable_get("htmlpurifier_config_{$format}", FALSE);
    if (!empty($config_data['Filter.ExtractStyleBlocks'])) {
      if (!_htmlpurifier_load_csstidy()) {
        $config_data['Filter.ExtractStyleBlocks'] = '0';
        drupal_set_message("Could not enable ExtractStyleBlocks because CSSTidy was not installed.  You can download CSSTidy module from <a href='http://drupal.org/project/csstidy'>http://drupal.org/project/csstidy</a>", 'error', FALSE);
      }
    }

    // {FALSE, TRUE, FALSE} = {no index, everything is allowed, don't do mq fix}
    $config
      ->mergeArrayFromForm($config_data, FALSE, TRUE, FALSE);
  }
  return $config;
}
function _htmlpurifier_load_csstidY() {

  // If CSSTidy module is installed, it should have a copy we can use
  $csstidy_path = drupal_get_path('module', 'csstidy') . '/csstidy';

  // Some future-proofing for library path
  if (function_exists('libraries_get_path')) {
    $csstidy_library = libraries_get_path('csstidy');
    if (file_exists("{$csstidy_library}/class.csstidy.php")) {
      $csstidy_path = $csstidy_library;
    }
  }

  // Load CSSTidy if we can find it
  if (file_exists("{$csstidy_path}/class.csstidy.php")) {
    require_once "{$csstidy_path}/class.csstidy.php";
    return TRUE;
  }
  return FALSE;
}

/**
 * Returns the name of the configuration function for $format, or FALSE if none
 * exists. Function name will be htmlpurifier_config_N.
 * 
 * @param int $format
 *    Integer format to check function for.
 * @return
 *    String function name for format, or FALSE if none.
 */
function _htmlpurifier_config_load($format) {
  $config_file = drupal_get_path('module', 'htmlpurifier') . "/config/{$format}.php";
  $config_function = "htmlpurifier_config_{$format}";
  if (!function_exists($config_function) && file_exists($config_file)) {
    include_once $config_file;
  }
  return function_exists($config_function) ? $config_function : FALSE;
}

/**
 * Generates a settings form for configuring HTML Purifier.
 * @param int $delta
 *    Whether or not to use advanced form (1) or not (0).
 * @param int $format
 *    Input format being configured.
 * @return
 *    Form API array.
 */
function _htmlpurifier_settings($delta, $format) {
  _htmlpurifier_load();

  // Dry run, testing for errors:
  _htmlpurifier_process('', $format, FALSE);
  $module_path = drupal_get_path('module', 'htmlpurifier');
  drupal_add_css("{$module_path}/config-form.css");

  // Makes all configuration links open in new windows; can safe lots of grief!
  drupal_add_js('$(function(){$(".hp-config a").click(function(){window.open(this.href);return false;});});', 'inline');
  drupal_add_js(HTMLPurifier_Printer_ConfigForm::getJavaScript(), 'inline');
  $form = array();
  $form['dashboard'] = array(
    '#type' => 'fieldset',
    '#title' => t('HTML Purifier Dashboard'),
    '#collapsible' => true,
  );
  $form['dashboard']["enter_hack"] = array(
    // hack to make normal form submission when <ENTER> is pressed
    '#value' => '<input type="submit" name="op" id="edit-submit" value="Save configuration"  class="form-submit" style="display:none;" />',
  );
  $form['dashboard']["htmlpurifier_clear_cache"] = array(
    '#type' => 'submit',
    '#value' => t('Clear cache (Warning: Can result in performance degradation)'),
    '#submit' => array(
      '_htmlpurifier_clear_cache',
    ),
  );
  $form['htmlpurifier'] = array(
    '#type' => 'fieldset',
    '#title' => t('HTML Purifier'),
    '#collapsible' => TRUE,
  );
  $form['htmlpurifier']["htmlpurifier_help_{$format}"] = array(
    '#type' => 'checkbox',
    '#title' => t('Display help text'),
    '#default_value' => variable_get("htmlpurifier_help_{$format}", TRUE),
    '#description' => t('If enabled, a short note will be added to the filter tips explaining that HTML will be transformed to conform with HTML standards. You may want to disable this option when the HTML Purifier is used to check the output of another filter like BBCode.'),
  );
  if ($config_function = _htmlpurifier_config_load($format)) {
    $form['htmlpurifier']['notice'] = array(
      '#type' => 'markup',
      '#value' => t('<div>Configuration function <code>!function()</code> is already defined. To edit HTML Purifier\'s configuration, edit the corresponding configuration file, which is usually <code>htmlpurifier/config/!format.php</code>. To restore the web configuration form, delete or rename this file.</div>', array(
        '!function' => $config_function,
        '!format' => $format,
      )),
    );
  }
  else {
    if ($delta == 0) {
      $title = t('Configure HTML Purifier');
      $allowed = array(
        'URI.DisableExternalResources',
        'URI.DisableResources',
        'URI.Munge',
        'Attr.EnableID',
        'HTML.Allowed',
        'HTML.ForbiddenElements',
        'HTML.ForbiddenAttributes',
        'HTML.SafeObject',
        'Output.FlashCompat',
        'AutoFormat.RemoveEmpty',
        'AutoFormat.Linkify',
        'AutoFormat.AutoParagraph',
      );
    }
    else {
      $title = t('Advanced configuration options');
      $allowed = TRUE;
      $form['htmlpurifier']["htmlpurifier_doublecache"] = array(
        '#type' => 'checkbox',
        '#title' => t('Allow double caching'),
        '#default_value' => variable_get("htmlpurifier_doublecache", FALSE),
        '#description' => t('If enabled, HTML Purifier will tell filter that its output is cacheable. This is not usually necessary, because HTML Purifier maintains its own cache, but may be helpful if you have later filters that need to be cached. Warning: this applies to ALL filters, not just this one'),
      );
    }
    $intro = '<div class="form-item"><h3>' . $title . '</h3><div class="description">' . t('Please click on a directive name for more information on what it does before enabling or changing anything!  Changes will not apply to old entries until you clear the cache (see the dashboard)') . '</div></div>';
    $config = _htmlpurifier_get_config($format);
    $config_form = new HTMLPurifier_Printer_ConfigForm("htmlpurifier_config_{$format}", 'http://htmlpurifier.org/live/configdoc/plain.html#%s');
    $form['htmlpurifier']["htmlpurifier_config_{$format}"] = array(
      '#value' => $intro . $config_form
        ->render($config, $allowed, FALSE),
      '#after_build' => array(
        '_htmlpurifier_config_hack',
      ),
    );
  }
  return $form;
}

/**
 * Fills out the form state with extra post data originating from the
 * HTML Purifier configuration form. This is an #after_build hook function.
 * 
 * @warning
 *    If someone ever gets the smart idea of changing the parameters to
 *    this function, I'm SOL! ;-)
 */
function _htmlpurifier_config_hack($form_element, &$form_state) {
  $key = $form_element['#parents'][0];
  if (!empty($form_element['#post']) && isset($form_element['#post'][$key])) {
    $form_state['values'][$key] = $form_element['#post'][$key];
  }
  foreach ($form_state['values'] as $i => $config_data) {
    if (!is_array($config_data)) {
      continue;
    }
    if (!empty($config_data['Filter.ExtractStyleBlocks'])) {
      if (!empty($config_data['Null_Filter.ExtractStyleBlocks.Scope'])) {
        drupal_set_message("You have not set <code>Filter.ExtractStyleBlocks.Scope</code>; this means that users can add CSS that affects all of your Drupal theme and not just their content block.  It is recommended to set this to <code>#node-[%HTMLPURIFIER:NID%]</code> (including brackets) which will automatically ensure that CSS directives only apply to their node.", 'warning', FALSE);
      }
      elseif (!isset($config_data['Filter.ExtractStyleBlocks.Scope']) || $config_data['Filter.ExtractStyleBlocks.Scope'] !== '#node-[%HTMLPURIFIER:NID%]') {
        drupal_set_message("You have enabled Filter.ExtractStyleBlocks.Scope, but you did not set it to <code>#node-[%HTMLPURIFIER:NID%]</code>; CSS may not work unless you have special theme support.", 'warning', FALSE);
      }
    }
  }
  return $form_element;
}

/**
 * Clears the HTML Purifier internal Drupal cache.
 */
function _htmlpurifier_clear_cache($form, &$form_state) {
  drupal_set_message("Cache cleared");
  db_query("DELETE FROM {cache_htmlpurifier}");
  db_query("DELETE FROM {cache} WHERE cid LIKE '%s%%'", 'htmlpurifier:');
}

Functions

Namesort descending Description
htmlpurifier_check_version Checks for updates to the HTML Purifier library.
htmlpurifier_cron Implementation of hook_cron().
htmlpurifier_filter Implementation of hook_filter().
htmlpurifier_filter_tips Implementation of hook_filter_tips().
htmlpurifier_flush_caches Implementation of hook_flush_caches().
htmlpurifier_help Implementation of hook_help().
htmlpurifier_nodeapi Implementation of hook_nodeapi().
_htmlpurifier_add_css Helper function for hook_nodeapi Finds extracted style blocks based on a cache link left by hook_filter Aggregates the extracted style blocks and adds them to the document head Also removes the cache link left in hook_filter to the CSS cache
_htmlpurifier_clear_cache Clears the HTML Purifier internal Drupal cache.
_htmlpurifier_config_hack Fills out the form state with extra post data originating from the HTML Purifier configuration form. This is an #after_build hook function.
_htmlpurifier_config_load Returns the name of the configuration function for $format, or FALSE if none exists. Function name will be htmlpurifier_config_N.
_htmlpurifier_get_config Returns the HTMLPurifier_Config object corresponding to an input format.
_htmlpurifier_load Loads the HTML Purifier library, and performs global initialization.
_htmlpurifier_load_csstidY
_htmlpurifier_process Processes HTML according to a format and returns purified HTML. Makes a cache pass if possible.
_htmlpurifier_settings Generates a settings form for configuring HTML Purifier.