View source
<?php
function htmltidy_menu($may_cache) {
$items = array();
if ($may_cache) {
$items[] = array(
'path' => 'admin/settings/htmltidy',
'title' => t('HTML Tidy'),
'description' => t('Configure settings for HTML Tidy.'),
'callback' => 'drupal_get_form',
'callback arguments' => 'htmltidy_settings',
'access' => user_access('administer htmltidy'),
);
}
return $items;
}
function htmltidy_help($section) {
switch ($section) {
case 'admin/help#htmltidy':
return t("\n <p>\n This module uses <a href='http://tidy.sourceforge.net/'>HTML Tidy</a>\n to properly format HTML files. It can be used at any of several stages.\n <ul>\n <li>An input validator - to tidy user input as it's entered (Most efficient)</li>\n <li>An output filter - (normal Drupal filter) which validates content just before displaying it. (cached, so pretty good)</li>\n <li>Format the entire output of every page displayed, This will ensure your site is always 100% HTML or XHTML strict, but may be inefficient. This is the default setting</li>\n </ul>\n Options accepted include:\n <ul>\n <li>Word wrap - Specify line length (0 to disable).</li>\n <li>Indentation - Makes HTML human-readable.</li>\n <li>Append warnings - Outputs any feedback from Tidy to the webpage.</li>\n <ul>\n <li>Verbose mode - Tidy will attempt to describe warnings in detail (this is not actually\n very helpful).</li>\n <li>Run twice - Runs Tidy twice to get the line numbers on the warnings right.</li>\n </ul>\n </ul>\n </p><p>\n These settings are configured under the Drupal\n <a href='?q=admin/settings/htmltidy'>Administer > Settings > htmltidy</a> page,\n but the full range of HTMLTidy Options as documented\n <a href='http://tidy.sourceforge.net/docs/quickref.html'>on the download site</a>\n can be used if you create your own htmltidy.conf file.\n </p><p>\n Several permissions are also settable in the access control panel:\n <ul>\n <li>administer htmltidy - Self-explanatory.</li>\n <li>use htmltidy debug mode - Append warnings as mentioned above.</li>\n <li>use htmltidy on output - This lets you turn htmltidy off to save proc time for user\n who will probably not be looking at source and to save bandwidth and processor time.</li>\n </ul>\n </p><p>\n There appear to be issues with the input validator conflicting with\n other rewrite filters, this hasn't been fully investigated yet.\n </p><p>\n Due to forking (or lack of it) under Windows platforms, you may see flickers of\n DOS boxes as the application is run. This depends a lot on how your server was configured\n (service or commandline app). This can be ignored.\n </p>\n ");
break;
case 'admin/modules#description':
return t("\n Repairs, indents and wraps HTML. Also gives debugging information about\n spec-conformance. Can be used as a complete site-wrapper, input\n validator, or an output filter.\n ");
break;
case 'admin/settings/htmltidy':
return t("\n <p>\n Here you can set up HTML tidying options.\n This allows you to clean up the HTML that Drupal emits,\n with indenting and word-wrapping options.\n </p><p>\n The recommended way of using the HTMLTidy module is to apply it as a\n <em>validator</em> over <em>input</em>.\n This means that invalid HTML never even makes it into the system.\n To force compliance on an existing website however,\n you may need to enable the <em>output</em> filter instead.\n This approach will tidy the entire page every time.\n An alternative solution is to use HTMLTidy as an output filter.\n This means that the contents of nodes will be validated and cached\n before display, but not the entire page.\n </p>\n ");
break;
}
return t($output);
}
function htmltidy_string($input, &$errors, &$warnings) {
$apppath = variable_get('htmltidy_apppath', '/usr/bin/tidy');
if (!file_exists($apppath)) {
$message = t("Failed to find htmltidy executable at '%htmltidy_apppath', not using tidy.", array(
'%htmltidy_apppath' => $apppath,
));
watchdog('htmltidy', $emssage, WATCHDOG_WARNING);
$errors[] = $message;
return '';
}
if (variable_get('htmltidy_indent', 1)) {
$args[] = '--indent auto';
}
if (!variable_get('htmltidy_verbose', 0)) {
$args[] = '-q';
}
if (!variable_get('htmltidy_wrapphp', 1)) {
$args[] = '--wrap-php no';
}
if (!variable_get('htmltidy_tidymark', 1)) {
$args[] = '--tidy-mark no';
}
if (variable_get('htmltidy_clean', 1)) {
$args[] = '--clean yes';
}
if (variable_get('htmltidy_xhtml', 0)) {
$args[] = '--output-xhtml yes';
}
if (variable_get('htmltidy_enclosetext', 0)) {
$args[] = '--enclose-text yes';
}
if (variable_get('htmltidy_encloseblocktext', 0)) {
$args[] = '--enclose-block-text yes';
}
if (variable_get('htmltidy_wordcleanup', 0)) {
$args[] = '--bare yes';
$args[] = '--word-2000 yes';
$args[] = '--drop-proprietary-attributes yes';
}
if (variable_get('htmltidy_process_input', FALSE) && !module_exists('htmltidy_output')) {
$args[] = '--show-body-only yes';
}
$htmltidy_confpath = variable_get('htmltidy_confpath', '');
if (file_exists($htmltidy_confpath)) {
$args[] = '--config ' . $htmltidy_confpath;
}
$args[] = '--doctype ' . variable_get('htmltidy_doctype', 'auto');
$args[] = '-wrap ' . variable_get('htmltidy_wordwrap', 0);
$args[] = '-utf8';
$args[] = '-modify';
htmltidy_run($input, $args, $output, $errors, $warnings);
return $output;
}
function htmltidy_filter($op, $delta = 0, $format = NULL, $text = '') {
switch ($op) {
case 'list':
return array(
0 => t('HTML Tidy'),
);
case 'description':
return t('Corrects faulty and invalid HTML according to <a href="%admin-settings-htmltidy">htmltidy configuration rules</a>.', array(
'%admin-settings-htmltidy' => url('admin/settings/htmltidy'),
));
case 'process':
global $htmltidy_filter;
$errors = array();
$cleaned = htmltidy_fragment($text, $errors, $warnings);
$htmltidy_filter['filtered'] = TRUE;
$htmltidy_filter['errors'] = $errors;
$htmltidy_filter['warnings'] = $warnings;
return $cleaned;
default:
return $text;
}
}
function htmltidy_fragment($input, &$errors, &$warnings) {
if ($input) {
$html = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">';
$html .= "<html><head><title></title></head><body>\n";
$html .= $input;
$html .= '</body></html>';
$output = htmltidy_string($html, $errors, $warnings);
if (preg_match('|<body[^>]*>([\\s\\S]*)</body>|', $output, $matches)) {
$output = $matches[1];
}
htmltidy_fix_linenums($errors, -1);
htmltidy_fix_linenums($warnings, -1);
return $output;
}
}
function htmltidy_fix_linenums(&$array, $adjustment) {
for ($i = count($array) - 1; $i >= 0; $i--) {
$array[$i] = preg_replace_callback('|(line) (\\d+)|', create_function('$matches', 'return $matches[1] ." ". (int) ($matches[2] +' . $adjustment . ');'), $array[$i]);
}
}
function htmltidy_nodeapi(&$node, $op, $a3 = NULL, $page = NULL) {
if (variable_get('htmltidy_process_input', TRUE)) {
switch ($op) {
case 'prepare':
$_POST['body'] = htmltidy_fragment($_POST['body'], $errors, $warnings);
if ($errors) {
$errors = array_map('htmlentities', $errors);
form_set_error('body', theme('item_list', $errors));
}
break;
case 'validate':
global $htmltidy_filter;
if ($node->body) {
check_markup($node->body, $node->format);
if (isset($htmltidy_filter['filtered']) && $htmltidy_filter['filtered']) {
$errors = $htmltidy_filter['errors'];
$warnings = $htmltidy_filter['warnings'];
}
else {
$clean = htmltidy_fragment($node->body, $errors, $warnings);
form_set_value(array(
'#parents' => array(
'body',
),
), $clean);
}
if ($errors || $warnings) {
$message = '<p>Original body:</p><pre>' . htmlentities($node->body) . '</pre>';
if ($errors) {
$message .= theme('item_list', array_map('htmlentities', $errors));
form_set_error('body', $message);
}
if ($warnings) {
drupal_set_message(theme('item_list', array_map('htmlentities', $warnings)));
}
}
}
break;
}
}
}
function htmltidy_perm() {
return array(
'use htmltidy debug mode',
'administer htmltidy',
);
}
function htmltidy_settings() {
if (!htmltidy_test($message, $version)) {
drupal_set_message('HTMLTidy executable is not available. ' . $message, 'error');
$form['check_tidy'] = array(
'#type' => 'fieldset',
'#value' => t('We require the HTML Tidy binary to be available on the
server. Please <a href="http://tidy.sourceforge.net/">download and
install it</a> wherever you can, then tell me where to find it.'),
);
}
else {
$form['check_tidy'] = array(
'#value' => t('<p>HTMLTidy is present and correct: <pre>%tidy_version</pre></p>', array(
'%tidy_version' => $version,
)),
);
}
$form['htmltidy_apppath'] = array(
'#type' => 'textfield',
'#title' => t('Path to htmltidy executable'),
'#default_value' => variable_get('htmltidy_apppath', '/usr/bin/tidy'),
'#description' => t('Enter the full path to htmltidy. e.g. /usr/local/bin/tidy'),
);
$form['htmltidy_confpath'] = array(
'#type' => 'textfield',
'#title' => t('Path to htmltidy.conf'),
'#default_value' => variable_get('htmltidy_confpath', ''),
'#description' => t("For options more advanced than those shown here, you can use an <a href='http://tidy.sourceforge.net/docs/quickref.html'>HTMLTidy configuration file</a>. Enter the full path here ( eg <code>%path</code> ), or leave it blank for none. The explicit options here usually take precedence over the conf file.", array(
'%path' => preg_replace('|\\\\|', '/', dirname(__FILE__)) . "/htmltidy.conf",
)),
);
$form['formatting_paths'] = array(
'#type' => 'fieldset',
'#title' => t('Paths'),
'#tree' => FALSE,
'htmltidy_process_input' => array(
'#type' => 'checkbox',
'#title' => t('Validate input text'),
'#default_value' => variable_get('htmltidy_process_input', FALSE),
'#description' => t("More efficient than processing the output, we can instead run tidy over all text <em>entered</em> as node content. HTML will be corrected at 'Preview' time and only good HTML will ever be saved. Depending on the tidy options however, this may conflict slightly with the other output filters."),
),
);
$form['formatting_options'] = array(
'#type' => 'fieldset',
'#title' => t('Formatting Options'),
'htmltidy_indent' => array(
'#type' => 'checkbox',
'#title' => 'Indent output',
'#default_value' => variable_get('htmltidy_indent', 1),
'#description' => t('When checked, htmltidy will indent HTML blocks. (<div>, <p>, etc.)'),
),
'htmltidy_wrapphp' => array(
'#type' => 'checkbox',
'#title' => t('wrap-php'),
'#default_value' => variable_get('htmltidy_wrapphp', 1),
'#description' => t('When checked, htmltidy will wrap php pseudo-elements at the column entered above. Naturally, you must set the wrap column before this will do anything.'),
),
'htmltidy_tidymark' => array(
'#type' => 'checkbox',
'#title' => 'tidy-mark',
'#default_value' => variable_get('htmltidy_tidymark', 1),
'#description' => t('When checked, htmltidy will include a <meta> tag specifying that htmltidy was used to generate the HTML. This has no effect if the <meta> tag is already specified.'),
),
'htmltidy_clean' => array(
'#type' => 'checkbox',
'#title' => 'clean',
'#default_value' => variable_get('htmltidy_clean', 1),
'#description' => t('Removes surplus tags and attributes, eliminating FONT tags and other, replacing them with style rules and structual markup. Be cautioned that turning this setting on will most likely break parts of Drupal (most notably the book module), and the automatically named style rules may simply not work.'),
),
'htmltidy_xhtml' => array(
'#type' => 'checkbox',
'#title' => 'output-xhtml',
'#default_value' => variable_get('htmltidy_xhtml', 0),
'#description' => t('Generate XHTML content. This will set the doctype and namespace to the appropriate XHTML spec. Note that you need to set the doctype below to actually validate against an XHTML DTD.'),
),
'htmltidy_doctype' => array(
'#type' => 'textfield',
'#title' => 'doctype',
'#default_value' => variable_get('htmltidy_doctype', 'auto'),
'#size' => 25,
'#maxlength' => 25,
'#description' => t('Enter the doctype declaration that tidy will generate and validate against (if generating XHTML). Valid options include: omit, auto, strict, loose, and any valid formal public identifier (don\'t try this if you are unsure what that means).'),
),
'htmltidy_enclosetext' => array(
'#type' => 'checkbox',
'#title' => 'enclose-text',
'#default_value' => variable_get('htmltidy_enclosetext', 0),
'#description' => t('Tidy will enclose any text found in the body element with <p> tags. This lets you use stylesheets with greater control, fixes margins, and is required if you want valid XHTML.'),
),
'htmltidy_encloseblocktext' => array(
'#type' => 'checkbox',
'#title' => 'enclose-block-text',
'#default_value' => variable_get('htmltidy_encloseblocktext', 0),
'#description' => t('Just like the above option, but applies to any text found in an element that allows mixed content for HTML Transitional but not HTML Strict.'),
),
'htmltidy_wordcleanup' => array(
'#type' => 'checkbox',
'#title' => 'word-2000',
'#default_value' => variable_get('htmltidy_wordcleanup', 1),
'#description' => t('This option specifies if Tidy should go to great pains to strip out all the surplus stuff Microsoft Word 2000 inserts when you save Word documents as "Web pages".'),
),
);
$form['debug_options'] = array(
'#type' => 'fieldset',
'#title' => t('Debug Options'),
'#collapsible' => TRUE,
'#collapsed' => TRUE,
'htmltidy_warnings' => array(
'#type' => 'checkbox',
'#title' => t('Append errors and warnings'),
'#default_value' => variable_get('htmltidy_warnings', 0),
'#description' => t('When checked, errors, warnings and info from htmltidy will be appended to the end of pages, but only for users in <a href="%admin-user-role">roles</a> with the <strong>use htmltidy debug mode</strong> <a href="%admin-user-permission">permission flag</a> set.<br />This is useful for catching non-XHTML compliant document errors, for example.', array(
'%admin-user-role' => url('admin/user/role'),
'%admin-user-permission' => url('admin/user/permission'),
)),
),
'htmltidy_verbose' => array(
'#type' => 'checkbox',
'#title' => t('Verbose'),
'#default_value' => variable_get('htmltidy_verbose', 0),
'#description' => t('Be more verbose (describe what warnings/errors mean in footer).'),
),
'htmltidy_runtwice' => array(
'#type' => 'checkbox',
'#title' => t('Run twice'),
'#default_value' => variable_get('htmltidy_runtwice', 1),
'#description' => t('This gets the line numbers on the warnings right, but is slower. This applies only if debug mode is on.'),
),
);
return system_settings_form($form);
}
function htmltidy_test(&$message, &$version) {
$tidypath = variable_get('htmltidy_apppath', '/usr/bin/tidy');
if (!file_exists($tidypath)) {
if (substr(PHP_OS, 0, 3) == 'WIN') {
$maybepaths = array(
preg_replace('|\\\\+|', '/', dirname(__FILE__)) . '/bin/tidy.exe',
);
}
else {
$maybepaths = array(
'/bin/tidy',
'/usr/bin/tidy',
'/usr/local/bin/tidy',
preg_replace('|\\\\+|', '/', dirname(__FILE__)) . '/bin/tidy',
);
}
foreach ($maybepaths as $tidypath) {
drupal_set_message('Looking for tidy at ' . $tidypath);
if (file_exists($tidypath)) {
break;
}
}
if (!file_exists($tidypath)) {
$message = "Couldn't find tidy binary anywhere!";
return FALSE;
}
variable_set('htmltidy_apppath', $tidypath);
}
$command = escapeshellcmd($tidypath . ' -v');
if (exec($command, $response)) {
$version = $response[0];
return TRUE;
}
else {
$message = "Found a 'tidy' binary, but it didn't run right. \n{$command}\nfailed to respond correctly";
return FALSE;
}
}
function htmltidy_run($input, $args, &$output, &$errors, &$warnings) {
$tidypath = variable_get('htmltidy_apppath', '/usr/bin/tidy');
if (!file_exists($tidypath)) {
watchdog('htmltidy', t('Failed to find htmltidy executable at %htmltidy_apppath, not using tidy', array(
'%htmltidy_apppath' => $tidypath,
)), WATCHDOG_WARNING);
$output = '';
return 2;
}
$dirtyFilename = tempnam(file_directory_temp(), 'drup');
$f = fopen($dirtyFilename, 'w');
fwrite($f, $input);
fclose($f);
$warningsFilename = tempnam(file_directory_temp(), 'warn');
$args[] = '-f ' . $warningsFilename;
$command = $tidypath . ' ' . implode(' ', $args) . ' ' . $dirtyFilename;
system($command, $return_value);
switch ($return_value) {
case 0:
$warnings = $errors = array();
$output = file_get_contents($dirtyFilename);
break;
case 1:
$errors = array();
$warnings = array_map('trim', file($warningsFilename));
$output = file_get_contents($dirtyFilename);
break;
case 2:
foreach (file($warningsFilename) as $line) {
$line = trim($line);
if (preg_match('|^line \\d+ column \\d+ - Warning:|', $line)) {
$warnings[] = $line;
}
else {
$errors[] = $line;
}
}
$output = $input;
break;
}
unlink($dirtyFilename);
unlink($warningsFilename);
return $return_value;
}