htmltidy.module in HTML Tidy 7
Same filename and directory in other branches
The htmltidy module uses Tidy (http://tidy.sf.net) to properly format HTML for saving and display.
File
htmltidy.moduleView source
<?php
/**
* @file
* The htmltidy module uses Tidy (http://tidy.sf.net) to properly format HTML
* for saving and display.
*/
/****************************************************************************
* Drupal hooks
****************************************************************************/
/**
* Implementation of hook_hook_info().
*/
function htmltidy_hook_info() {
// Filter hooks.
$hooks['filter_info'] = array(
'group' => 'filter',
);
$hooks['filter_htmltidy_process'] = array(
'group' => 'filter',
);
$hooks['filter_htmltidy_settings'] = array(
'group' => 'filter',
);
$hooks['filter_htmltidy_tips'] = array(
'group' => 'filter',
);
// Node hooks.
// These should really be Entity hooks, but that won't be possible until
// the Entity versions of these functions exist (e.g. hook_entity_prepare().
$hooks['node_prepare'] = array(
'group' => 'node',
);
$hooks['node_validate'] = array(
'group' => 'node',
);
return $hooks;
}
/**
* Implementation of hook_permission().
*/
function htmltidy_permission() {
return array(
'administer htmltidy' => array(
'title' => t('Administer htmltidy'),
'description' => t('use htmltidy debug mode.'),
),
);
}
/**
* Impelementation of hook_help().
*/
function htmltidy_help($path, $arg) {
switch ($path) {
case 'admin/help/htmltidy':
return t("\n <p>\n This module uses <a href='http://tidy.sourceforge.net/'>HTML Tidy</a>\n to properly format HTML files. It can be used at any of several stages.\n <ul>\n <li>An input validator - to tidy user input as it's entered (Most efficient)</li>\n <li>An output filter - (normal Drupal filter) which validates content just before displaying it. (cached, so pretty good)</li>\n </ul>\n Options accepted include:\n <ul>\n <li>Word wrap - Specify line length (0 to disable).</li>\n <li>Indentation - Makes HTML human-readable.</li>\n <li>Append warnings - Outputs any feedback from Tidy to the webpage.</li>\n <ul>\n <li>Verbose mode - Tidy will attempt to describe warnings in detail (this is not actually\n very helpful).</li>\n <li>Run twice - Runs Tidy twice to get the line numbers on the warnings right.</li>\n </ul>\n </ul>\n </p><p>\n These settings are configured under the \"Configure\" menu of any <a href='@help'>Input Format</a> that you enable the filter on.\n The full range of HTMLTidy Options as documented\n <a href='http://tidy.sourceforge.net/docs/quickref.html'>on the download site</a>\n can be used if you create your own htmltidy.conf file.\n </p><p>\n Several permissions are also settable in the access control panel:\n <ul>\n <li>administer htmltidy - Self-explanatory.</li>\n <li>use htmltidy debug mode - Append warnings as mentioned above.</li>\n </ul>\n </p><p>\n There appear to be issues with the input validator conflicting with\n other rewrite filters, this hasn't been fully investigated yet.\n </p><p>\n Due to forking (or lack of it) under Windows platforms, you may see flickers of\n DOS boxes as the application is run. This depends a lot on how your server was configured\n (service or commandline app). This can be ignored.\n </p>\n ", array(
'@help' => url('admin/config/content/formats'),
));
break;
case 'admin/modules/description':
return t("\n Repairs, indents and wraps HTML. Also gives debugging information about\n spec-conformance. Can be used as a complete site-wrapper, input\n validator, or an output filter.\n ");
break;
}
}
/****************************************************************************
* Helper functions
****************************************************************************/
/**
* Process whatever we are given and return the htmltidy response
* The output and warnings will be returned as arrays by reference.
*
* @param $text
* HTML string to be tidied
* @param $input
* FALSE if text is for output; TRUE if text is for input
* @param $settings
* Filter settings for the string
* @param $errors
* An array to be filled with error info
* @param $warnings
* An array to be filled with warning info
* @return
* The tidied string
*/
function htmltidy_string($text, $input, $settings, &$errors, &$warnings) {
// Fill in any missing configuration with default settings.
$settings += htmltidy_default_settings();
// If the string is for input, and the option to process input is off, simply
// return the string unmolested.
if ($input && !$settings['format']['process_input']) {
return $text;
}
// Make sure that we can find the executable.
if (!file_exists($settings['paths']['app'])) {
$message = "Failed to find htmltidy executable at '%htmltidy_apppath', not using tidy.";
$strings = array(
'%htmltidy_apppath' => $settings['paths']['app'],
);
watchdog('htmltidy', $message, $strings, WATCHDOG_WARNING);
$errors[] = t($message, $strings);
return '';
}
/*
* Do not pass the parameters their default values as defined in the
* documentation for tidy (http://www.w3.org/People/Raggett/tidy/), or weird
* stuff starts to happen.
*/
if ($settings['format']['indent']) {
$args[] = '--indent auto';
}
if (!$settings['debug']['verbose']) {
$args[] = '-q';
}
if (!$settings['format']['wrapphp']) {
$args[] = '--wrap-php no';
}
if (!$settings['format']['tidymark']) {
$args[] = '--tidy-mark no';
}
if ($settings['format']['clean']) {
$args[] = '--clean yes';
}
if ($settings['format']['xhtml']) {
$args[] = '--output-xhtml yes';
}
if ($settings['format']['enclosetext']) {
$args[] = '--enclose-text yes';
}
if ($settings['format']['encloseblocktext']) {
$args[] = '--enclose-block-text yes';
}
if ($settings['format']['wordcleanup']) {
$args[] = '--bare yes';
$args[] = '--word-2000 yes';
$args[] = '--drop-proprietary-attributes yes';
}
if (htmltidy_empty($settings['format']['process_input'], FALSE) && !module_exists('htmltidy_output')) {
$args[] = '--show-body-only yes';
}
// user specified configuration file
$htmltidy_confpath = $settings['paths']['config'];
if (!empty($htmltidy_confpath) && file_exists($htmltidy_confpath)) {
$args[] = '-config ' . $htmltidy_confpath;
}
if (!empty($settings['format']['doctype'])) {
$args[] = '--doctype ' . $settings['format']['doctype'];
}
$args[] = '-wrap ' . intval($settings['format']['wordwrap']);
$args[] = '-utf8';
$args[] = '-modify';
// modify the input file instead of outputting to stdout.
$output = '';
htmltidy_run($text, $settings['paths']['app'], $args, $output, $errors, $warnings);
// Output debugging info.
if ($settings['debug']['warnings'] && user_access('use htmltidy debug mode')) {
$header = "<style type=\"text/css\"> .htmltidy { border: 1px dashed #aaa; background-color: #eee; padding: 1em;\n" . "margin: 1em; float: left; font-family: \"courier new\", sans-serif; font-size: 8pt; color: #050; } </style>";
drupal_set_html_head($header);
if (isset($warnings_filename)) {
$warnings = file_get_contents($warnings_filename);
drupal_set_message("<h3>HTMLTidy Debug</h3><kbd>{$apppath} {$cline} -wrap {$wordwrap} -utf8 -f {$warnings_filename} {$dirty_filename}</kbd>");
}
}
return $output;
}
/**
* Tidies an incomplete fragment of HTML by passing it through htmltidy full,
* then stripping back down to the 'body'.
*
* @param $text
* HTML string to be tidied
* @param $input
* FALSE if text is for output; TRUE if text is for input
* @param $settings
* Filter settings for the fragment
* @param $errors
* An array to be filled with error info
* @param $warnings
* An array to be filled with warning info
* @return
* The tidied string
*/
function htmltidy_fragment($text, $input, $settings, &$errors, &$warnings) {
if ($text) {
// Pretend it's a full document. This declaration just suppresses one of
// the warnings.
if (!strstr($text, '<html') && !strstr($text, '<HTML')) {
$html = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
// Put a new line after the fake headers so our content starts at the
// begining of a line. this way we can get correct line/column info by just
// subtracting one from the line number
$html .= "<html><head><title></title></head><body>\n";
$html .= $text;
$html .= '</body></html>';
}
else {
$html = $text;
}
$output = htmltidy_string($html, $input, $settings, $errors, $warnings);
// Remove the html wrapper
if (preg_match('|<body[^>]*>([\\s\\S]*)</body>|', $output, $matches)) {
$output = $matches[1];
}
// fix the line numbers on both errors and warnings arrays (subtract 1 from each)
htmltidy_fix_linenums($errors, -1);
htmltidy_fix_linenums($warnings, -1);
return $output;
}
}
/**
* Adjust the line numbers in an array of htmltidy errors or warnings.
* @param $array array of warning or error strings.
* @param $adjustment integer to add to each line number (negative values are
* allowed).
* @return array
*/
function htmltidy_fix_linenums(&$array, $adjustment) {
for ($i = count($array) - 1; $i >= 0; $i--) {
$array[$i] = preg_replace_callback('|(line) (\\d+)|', create_function('$matches', 'return $matches[1] ." ". (int) ($matches[2] +' . $adjustment . ');'), $array[$i]);
}
}
/**
* Return an array of the expected HTML tidy options.
*
* Prefilling them in with sane values one place drastically reduces the php
* log notices and the need to check if a value has been set all the time..
*/
function htmltidy_default_settings() {
$default_settings = array(
'paths' => array(
'app' => '/usr/bin/tidy',
'config' => '',
),
'format' => array(
'process_input' => FALSE,
'indent' => 1,
'wordwrap' => 80,
'wrapphp' => 1,
'tidymark' => 0,
'clean' => 1,
'xhtml' => 1,
'doctype' => 'auto',
'enclosetext' => 0,
'encloseblocktext' => 0,
'wordcleanup' => 1,
),
'debug' => array(
'warnings' => 0,
'verbose' => 0,
'runtwice' => 1,
),
);
return $default_settings;
}
/**
* Sets the htmltidy_apppath Drupal variable to a valid value.
* @param $message Assigned to an explanation.
* @return true if ok, false on error.
*/
function htmltidy_test(&$message, &$version) {
# // we aren't setup to use the extension
# if (extension_loaded('tidy')) {
# $version = 'PHP Tidy Extension enabled OK';
# return TRUE;
# }
$tidypath = variable_get('htmltidy_apppath', '/usr/bin/tidy');
if (!file_exists($tidypath)) {
// windows specific paths
if (substr(PHP_OS, 0, 3) == 'WIN') {
$maybepaths = array(
preg_replace('|\\\\+|', '/', dirname(__FILE__)) . '/bin/tidy.exe',
);
}
else {
$maybepaths = array(
'/bin/tidy',
'/usr/bin/tidy',
'/usr/local/bin/tidy',
preg_replace('|\\\\+|', '/', dirname(__FILE__)) . '/bin/tidy',
);
}
foreach ($maybepaths as $tidypath) {
drupal_set_message('Looking for tidy at ' . $tidypath);
if (file_exists($tidypath)) {
break;
}
}
if (!file_exists($tidypath)) {
$message = "Couldn't find tidy binary anywhere!";
return FALSE;
}
variable_set('htmltidy_apppath', $tidypath);
}
// now test it
$command = escapeshellcmd($tidypath . ' -v');
if (exec($command, $response)) {
$version = $response[0];
return TRUE;
}
else {
$message = "Found a 'tidy' binary, but it didn't run right. \n{$command}\nfailed to respond correctly";
return FALSE;
}
}
/**
* Process the input through tidy engine
* @param $input
* The raw html/xml
* @param $path
* full system path of tidy binary
* @param $args
* arguments to run tidy with
* @param $output
* output to add to
* @param $errors
* errors to add to
* @param $warnings
* warnings to ad too
* @return unknown_type
* return value of tidy
* 0 - All input files were processed successfully.
* 1 - There were warnings.
* 2 - There were errors.
*/
function htmltidy_run($input, $tidypath, $args, &$output, &$errors, &$warnings) {
if (!file_exists($tidypath)) {
watchdog('htmltidy', 'Failed to find htmltidy executable at %htmltidy_apppath, not using tidy', array(
'%htmltidy_apppath' => $tidypath,
), WATCHDOG_WARNING);
$output = '';
return 2;
}
// Run Tidy with the right options.
$command = $tidypath . ' ' . implode(' ', $args);
$descriptorspec = array(
0 => array(
"pipe",
"r",
),
// stdin is a pipe that the child will read from
1 => array(
"pipe",
"w",
),
// stdout is a pipe that the child will write to
2 => array(
"pipe",
"w",
),
);
$process = proc_open($command, $descriptorspec, $pipes);
fwrite($pipes[0], $input);
fclose($pipes[0]);
$stdout = stream_get_contents($pipes[1]);
$stderr = stream_get_contents($pipes[2]);
$return_value = proc_close($process);
// return_value 0 means success. 1 means warning. 2 means error, the file
// will be there, but not have been touched.
switch ($return_value) {
case 0:
$warnings = $errors = array();
$output = $stdout;
break;
case 1:
$errors = array();
foreach (array_filter(split("\n", $stderr)) as $line) {
$warnings[] = trim($line);
}
$output = $stdout;
break;
case 2:
// separate errors and warnings into two different arrays
foreach (array_filter(split("\n", $stdout)) as $line) {
$line = trim($line);
if (preg_match('|^line \\d+ column \\d+ - Warning:|', $line)) {
$warnings[] = $line;
}
else {
$errors[] = $line;
}
}
$output = $input;
break;
}
return $return_value;
}
/**
* Helper function for defaults in settings
* @param $var
* the requested variable
* @param $default
* the default value
* @return unknown_type
* the requested variable if set otherwise default
*/
function htmltidy_empty($var = NULL, $default = NULL) {
if (isset($var)) {
return $var;
}
else {
return $default;
}
}
/**
* Get all of the formats that have the HTML Tidy filter enabled.
*
* @return
* An array of the formats, keyed by the machine name of each format,
* where the values are the settings of each one.
*/
function htmltidy_get_formats() {
return db_query('SELECT format, settings FROM {filter}
WHERE module = :module AND name = :name AND status = 1', array(
'module' => 'htmltidy',
'name' => 'htmltidy',
))
->fetchAllKeyed();
}
Functions
Name![]() |
Description |
---|---|
htmltidy_default_settings | Return an array of the expected HTML tidy options. |
htmltidy_empty | Helper function for defaults in settings |
htmltidy_fix_linenums | Adjust the line numbers in an array of htmltidy errors or warnings. |
htmltidy_fragment | Tidies an incomplete fragment of HTML by passing it through htmltidy full, then stripping back down to the 'body'. |
htmltidy_get_formats | Get all of the formats that have the HTML Tidy filter enabled. |
htmltidy_help | Impelementation of hook_help(). |
htmltidy_hook_info | Implementation of hook_hook_info(). |
htmltidy_permission | Implementation of hook_permission(). |
htmltidy_run | Process the input through tidy engine |
htmltidy_string | Process whatever we are given and return the htmltidy response The output and warnings will be returned as arrays by reference. |
htmltidy_test | Sets the htmltidy_apppath Drupal variable to a valid value. |