You are here

htmltidy.module in HTML Tidy 7

Same filename and directory in other branches
  1. 5 htmltidy.module
  2. 6 htmltidy.module

The htmltidy module uses Tidy (http://tidy.sf.net) to properly format HTML for saving and display.

File

htmltidy.module
View source
<?php

/**
 * @file
 * The htmltidy module uses Tidy (http://tidy.sf.net) to properly format HTML
 * for saving and display.
 */

/****************************************************************************
 * Drupal hooks
 ****************************************************************************/

/**
 * Implementation of hook_hook_info().
 */
function htmltidy_hook_info() {

  // Filter hooks.
  $hooks['filter_info'] = array(
    'group' => 'filter',
  );
  $hooks['filter_htmltidy_process'] = array(
    'group' => 'filter',
  );
  $hooks['filter_htmltidy_settings'] = array(
    'group' => 'filter',
  );
  $hooks['filter_htmltidy_tips'] = array(
    'group' => 'filter',
  );

  // Node hooks.
  // These should really be Entity hooks, but that won't be possible until
  // the Entity versions of these functions exist (e.g. hook_entity_prepare().
  $hooks['node_prepare'] = array(
    'group' => 'node',
  );
  $hooks['node_validate'] = array(
    'group' => 'node',
  );
  return $hooks;
}

/**
 * Implementation of hook_permission().
 */
function htmltidy_permission() {
  return array(
    'administer htmltidy' => array(
      'title' => t('Administer htmltidy'),
      'description' => t('use htmltidy debug mode.'),
    ),
  );
}

/**
 * Impelementation of hook_help().
 */
function htmltidy_help($path, $arg) {
  switch ($path) {
    case 'admin/help/htmltidy':
      return t("\n        <p>\n          This module uses <a href='http://tidy.sourceforge.net/'>HTML Tidy</a>\n          to properly format HTML files. It can be used at any of several stages.\n          <ul>\n            <li>An input validator - to tidy user input as it's entered (Most efficient)</li>\n            <li>An output filter - (normal Drupal filter) which validates content just before displaying it. (cached, so pretty good)</li>\n          </ul>\n          Options accepted include:\n          <ul>\n            <li>Word wrap - Specify line length (0 to disable).</li>\n            <li>Indentation - Makes HTML human-readable.</li>\n            <li>Append warnings - Outputs any feedback from Tidy to the webpage.</li>\n            <ul>\n              <li>Verbose mode - Tidy will attempt to describe warnings in detail (this is not actually\n                very helpful).</li>\n              <li>Run twice - Runs Tidy twice to get the line numbers on the warnings right.</li>\n            </ul>\n          </ul>\n        </p><p>\n          These settings are configured under the \"Configure\" menu of any <a href='@help'>Input Format</a> that you enable the filter on.\n          The full range of HTMLTidy Options as documented\n          <a href='http://tidy.sourceforge.net/docs/quickref.html'>on the download site</a>\n          can be used if you create your own htmltidy.conf file.\n        </p><p>\n          Several permissions are also settable in the access control panel:\n          <ul>\n            <li>administer htmltidy - Self-explanatory.</li>\n            <li>use htmltidy debug mode - Append warnings as mentioned above.</li>\n          </ul>\n        </p><p>\n          There appear to be issues with the input validator conflicting with\n          other rewrite filters, this hasn't been fully investigated yet.\n        </p><p>\n          Due to forking (or lack of it) under Windows platforms, you may see flickers of\n          DOS boxes as the application is run. This depends a lot on how your server was configured\n          (service or commandline app). This can be ignored.\n        </p>\n      ", array(
        '@help' => url('admin/config/content/formats'),
      ));
      break;
    case 'admin/modules/description':
      return t("\n        Repairs, indents and wraps HTML. Also gives debugging information about\n        spec-conformance. Can be used as a complete site-wrapper, input\n        validator, or an output filter.\n      ");
      break;
  }
}

/****************************************************************************
 * Helper functions
 ****************************************************************************/

/**
 * Process whatever we are given and return the htmltidy response
 * The output and warnings will be returned as arrays by reference.
 *
 * @param $text
 *   HTML string to be tidied
 * @param $input
 *   FALSE if text is for output; TRUE if text is for input
 * @param $settings
 *   Filter settings for the string
 * @param $errors
 *   An array to be filled with error info
 * @param $warnings
 *   An array to be filled with warning info
 * @return
 *   The tidied string
 */
function htmltidy_string($text, $input, $settings, &$errors, &$warnings) {

  // Fill in any missing configuration with default settings.
  $settings += htmltidy_default_settings();

  // If the string is for input, and the option to process input is off, simply
  // return the string unmolested.
  if ($input && !$settings['format']['process_input']) {
    return $text;
  }

  // Make sure that we can find the executable.
  if (!file_exists($settings['paths']['app'])) {
    $message = "Failed to find htmltidy executable at '%htmltidy_apppath', not using tidy.";
    $strings = array(
      '%htmltidy_apppath' => $settings['paths']['app'],
    );
    watchdog('htmltidy', $message, $strings, WATCHDOG_WARNING);
    $errors[] = t($message, $strings);
    return '';
  }

  /*
   * Do not pass the parameters their default values as defined in the
   * documentation for tidy (http://www.w3.org/People/Raggett/tidy/), or weird
   * stuff starts to happen.
   */
  if ($settings['format']['indent']) {
    $args[] = '--indent auto';
  }
  if (!$settings['debug']['verbose']) {
    $args[] = '-q';
  }
  if (!$settings['format']['wrapphp']) {
    $args[] = '--wrap-php no';
  }
  if (!$settings['format']['tidymark']) {
    $args[] = '--tidy-mark no';
  }
  if ($settings['format']['clean']) {
    $args[] = '--clean yes';
  }
  if ($settings['format']['xhtml']) {
    $args[] = '--output-xhtml yes';
  }
  if ($settings['format']['enclosetext']) {
    $args[] = '--enclose-text yes';
  }
  if ($settings['format']['encloseblocktext']) {
    $args[] = '--enclose-block-text yes';
  }
  if ($settings['format']['wordcleanup']) {
    $args[] = '--bare yes';
    $args[] = '--word-2000 yes';
    $args[] = '--drop-proprietary-attributes yes';
  }
  if (htmltidy_empty($settings['format']['process_input'], FALSE) && !module_exists('htmltidy_output')) {
    $args[] = '--show-body-only yes';
  }

  // user specified configuration file
  $htmltidy_confpath = $settings['paths']['config'];
  if (!empty($htmltidy_confpath) && file_exists($htmltidy_confpath)) {
    $args[] = '-config ' . $htmltidy_confpath;
  }
  if (!empty($settings['format']['doctype'])) {
    $args[] = '--doctype ' . $settings['format']['doctype'];
  }
  $args[] = '-wrap ' . intval($settings['format']['wordwrap']);
  $args[] = '-utf8';
  $args[] = '-modify';

  // modify the input file instead of outputting to stdout.
  $output = '';
  htmltidy_run($text, $settings['paths']['app'], $args, $output, $errors, $warnings);

  // Output debugging info.
  if ($settings['debug']['warnings'] && user_access('use htmltidy debug mode')) {
    $header = "<style type=\"text/css\"> .htmltidy { border: 1px dashed #aaa; background-color: #eee; padding: 1em;\n" . "margin: 1em; float: left; font-family: \"courier new\", sans-serif; font-size: 8pt; color: #050; } </style>";
    drupal_set_html_head($header);
    if (isset($warnings_filename)) {
      $warnings = file_get_contents($warnings_filename);
      drupal_set_message("<h3>HTMLTidy Debug</h3><kbd>{$apppath} {$cline} -wrap {$wordwrap} -utf8 -f {$warnings_filename} {$dirty_filename}</kbd>");
    }
  }
  return $output;
}

/**
 * Tidies an incomplete fragment of HTML by passing it through htmltidy full,
 * then stripping back down to the 'body'.
 *
 * @param $text
 *   HTML string to be tidied
 * @param $input
 *   FALSE if text is for output; TRUE if text is for input
 * @param $settings
 *   Filter settings for the fragment
 * @param $errors
 *   An array to be filled with error info
 * @param $warnings
 *   An array to be filled with warning info
 * @return
 *   The tidied string
 */
function htmltidy_fragment($text, $input, $settings, &$errors, &$warnings) {
  if ($text) {

    // Pretend it's a full document. This declaration just suppresses one of
    // the warnings.
    if (!strstr($text, '<html') && !strstr($text, '<HTML')) {
      $html = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';

      // Put a new line after the fake headers so our content starts at the
      // begining of a line. this way we can get correct line/column info by just
      // subtracting one from the line number
      $html .= "<html><head><title></title></head><body>\n";
      $html .= $text;
      $html .= '</body></html>';
    }
    else {
      $html = $text;
    }
    $output = htmltidy_string($html, $input, $settings, $errors, $warnings);

    // Remove the html wrapper
    if (preg_match('|<body[^>]*>([\\s\\S]*)</body>|', $output, $matches)) {
      $output = $matches[1];
    }

    // fix the line numbers on both errors and warnings arrays (subtract 1 from each)
    htmltidy_fix_linenums($errors, -1);
    htmltidy_fix_linenums($warnings, -1);
    return $output;
  }
}

/**
 * Adjust the line numbers in an array of htmltidy errors or warnings.
 * @param $array array of warning or error strings.
 * @param $adjustment integer to add to each line number (negative values are
 *   allowed).
 * @return array
 */
function htmltidy_fix_linenums(&$array, $adjustment) {
  for ($i = count($array) - 1; $i >= 0; $i--) {
    $array[$i] = preg_replace_callback('|(line) (\\d+)|', create_function('$matches', 'return $matches[1] ." ". (int) ($matches[2] +' . $adjustment . ');'), $array[$i]);
  }
}

/**
 * Return an array of the expected HTML tidy options.
 *
 * Prefilling them in with sane values one place drastically reduces the php
 * log notices and the need to check if a value has been set all the time..
 */
function htmltidy_default_settings() {
  $default_settings = array(
    'paths' => array(
      'app' => '/usr/bin/tidy',
      'config' => '',
    ),
    'format' => array(
      'process_input' => FALSE,
      'indent' => 1,
      'wordwrap' => 80,
      'wrapphp' => 1,
      'tidymark' => 0,
      'clean' => 1,
      'xhtml' => 1,
      'doctype' => 'auto',
      'enclosetext' => 0,
      'encloseblocktext' => 0,
      'wordcleanup' => 1,
    ),
    'debug' => array(
      'warnings' => 0,
      'verbose' => 0,
      'runtwice' => 1,
    ),
  );
  return $default_settings;
}

/**
 * Sets the htmltidy_apppath Drupal variable to a valid value.
 * @param $message Assigned to an explanation.
 * @return true if ok, false on error.
 */
function htmltidy_test(&$message, &$version) {

  #  // we aren't setup to use the extension

  #  if (extension_loaded('tidy')) {

  #    $version = 'PHP Tidy Extension enabled OK';

  #    return TRUE;

  #  }
  $tidypath = variable_get('htmltidy_apppath', '/usr/bin/tidy');
  if (!file_exists($tidypath)) {

    // windows specific paths
    if (substr(PHP_OS, 0, 3) == 'WIN') {
      $maybepaths = array(
        preg_replace('|\\\\+|', '/', dirname(__FILE__)) . '/bin/tidy.exe',
      );
    }
    else {
      $maybepaths = array(
        '/bin/tidy',
        '/usr/bin/tidy',
        '/usr/local/bin/tidy',
        preg_replace('|\\\\+|', '/', dirname(__FILE__)) . '/bin/tidy',
      );
    }
    foreach ($maybepaths as $tidypath) {
      drupal_set_message('Looking for tidy at ' . $tidypath);
      if (file_exists($tidypath)) {
        break;
      }
    }
    if (!file_exists($tidypath)) {
      $message = "Couldn't find tidy binary anywhere!";
      return FALSE;
    }
    variable_set('htmltidy_apppath', $tidypath);
  }

  // now test it
  $command = escapeshellcmd($tidypath . ' -v');
  if (exec($command, $response)) {
    $version = $response[0];
    return TRUE;
  }
  else {
    $message = "Found a 'tidy' binary, but it didn't run right. \n{$command}\nfailed to respond correctly";
    return FALSE;
  }
}

/**
 * Process the input through tidy engine
 * @param $input
 *   The raw html/xml
 * @param $path
 *   full system path of tidy binary
 * @param $args
 *   arguments to run tidy with
 * @param $output
 *   output to add to
 * @param $errors
 *   errors to add to
 * @param $warnings
 *   warnings to ad too
 * @return unknown_type
 *   return value of tidy
 *     0 - All input files were processed successfully.
 *     1 - There were warnings.
 *     2 - There were errors.
 */
function htmltidy_run($input, $tidypath, $args, &$output, &$errors, &$warnings) {
  if (!file_exists($tidypath)) {
    watchdog('htmltidy', 'Failed to find htmltidy executable at %htmltidy_apppath, not using tidy', array(
      '%htmltidy_apppath' => $tidypath,
    ), WATCHDOG_WARNING);
    $output = '';
    return 2;
  }

  // Run Tidy with the right options.
  $command = $tidypath . ' ' . implode(' ', $args);
  $descriptorspec = array(
    0 => array(
      "pipe",
      "r",
    ),
    // stdin is a pipe that the child will read from
    1 => array(
      "pipe",
      "w",
    ),
    // stdout is a pipe that the child will write to
    2 => array(
      "pipe",
      "w",
    ),
  );
  $process = proc_open($command, $descriptorspec, $pipes);
  fwrite($pipes[0], $input);
  fclose($pipes[0]);
  $stdout = stream_get_contents($pipes[1]);
  $stderr = stream_get_contents($pipes[2]);
  $return_value = proc_close($process);

  // return_value 0 means success. 1 means warning. 2 means error, the file
  // will be there, but not have been touched.
  switch ($return_value) {
    case 0:
      $warnings = $errors = array();
      $output = $stdout;
      break;
    case 1:
      $errors = array();
      foreach (array_filter(split("\n", $stderr)) as $line) {
        $warnings[] = trim($line);
      }
      $output = $stdout;
      break;
    case 2:

      // separate errors and warnings into two different arrays
      foreach (array_filter(split("\n", $stdout)) as $line) {
        $line = trim($line);
        if (preg_match('|^line \\d+ column \\d+ - Warning:|', $line)) {
          $warnings[] = $line;
        }
        else {
          $errors[] = $line;
        }
      }
      $output = $input;
      break;
  }
  return $return_value;
}

/**
 * Helper function for defaults in settings
 * @param $var
 *   the requested variable
 * @param $default
 *   the default value
 * @return unknown_type
 *   the requested variable if set otherwise default
 */
function htmltidy_empty($var = NULL, $default = NULL) {
  if (isset($var)) {
    return $var;
  }
  else {
    return $default;
  }
}

/**
 * Get all of the formats that have the HTML Tidy filter enabled.
 *
 * @return
 *   An array of the formats, keyed by the machine name of each format,
 *   where the values are the settings of each one.
 */
function htmltidy_get_formats() {
  return db_query('SELECT format, settings FROM {filter}
     WHERE module = :module AND name = :name AND status = 1', array(
    'module' => 'htmltidy',
    'name' => 'htmltidy',
  ))
    ->fetchAllKeyed();
}

Functions

Namesort descending Description
htmltidy_default_settings Return an array of the expected HTML tidy options.
htmltidy_empty Helper function for defaults in settings
htmltidy_fix_linenums Adjust the line numbers in an array of htmltidy errors or warnings.
htmltidy_fragment Tidies an incomplete fragment of HTML by passing it through htmltidy full, then stripping back down to the 'body'.
htmltidy_get_formats Get all of the formats that have the HTML Tidy filter enabled.
htmltidy_help Impelementation of hook_help().
htmltidy_hook_info Implementation of hook_hook_info().
htmltidy_permission Implementation of hook_permission().
htmltidy_run Process the input through tidy engine
htmltidy_string Process whatever we are given and return the htmltidy response The output and warnings will be returned as arrays by reference.
htmltidy_test Sets the htmltidy_apppath Drupal variable to a valid value.