You are here

pathologic.module in Pathologic 5

File

pathologic.module
View source
<?php

/**
 * Pathologic text filter for Drupal
 * by Garrett Albright
 * 
 * Buy these CDs now:
 * Back to Winnipeg by Fatblueman
 *   http://www.fatblueman.net/
 * A Terrible Flood by esselfortium
 *   http://sl4.poned.com/
 */

/**
 * Implementation of hook_filter().
 */
function pathologic_filter($op, $delta = 0, $format = -1, $text = '') {
  if ($op === 'process' && $text !== '') {
    if (variable_get("filter_pathologic_href_{$format}", TRUE)) {

      // Do transformation on HREF paths
      // Make relative
      $text = preg_replace_callback(_pathologic_abs_regex('href', $format), '_pathologic_abs_to_rel', $text);

      // Transform attributes
      $text = preg_replace_callback('|href="([^/#][^:"#]+)#?([^:"]*)"|', '_pathologic_do_href', $text);
    }
    if (variable_get("filter_pathologic_src_{$format}", TRUE)) {

      // Do transformation on SRC paths
      // Make relative
      $text = preg_replace_callback(_pathologic_abs_regex('src', $format), '_pathologic_abs_to_rel', $text);

      // Transform attributes
      $text = preg_replace_callback('|src="([^/][^:"]+)"|', '_pathologic_do_src', $text);
    }
    return $text;
  }
  if ($op === 'list') {
    return array(
      t('Pathologic'),
    );
  }
  if ($op === 'description') {
    return t('Corrects paths in content which reference pages or media on this server.');
  }
  if ($op === 'settings') {
    return array(
      'filter_pathologic' => array(
        '#type' => 'fieldset',
        '#title' => t('Pathologic'),
        '#collapsible' => TRUE,
        "filter_pathologic_href_{$format}" => array(
          '#type' => 'checkbox',
          '#title' => t('Transform values of <em>href</em> attributes'),
          '#default_value' => intval(variable_get("filter_pathologic_href_{$format}", 1)),
          '#description' => t('<em>href</em> attributes are used in link tags.'),
        ),
        "filter_pathologic_src_{$format}" => array(
          '#type' => 'checkbox',
          '#title' => t('Transform values of <em>src</em> attributes'),
          '#default_value' => intval(variable_get("filter_pathologic_src_{$format}", 1)),
          '#description' => t('<em>src</em> attributes are used in image tags and tags for other types of embedded media.'),
        ),
        "filter_pathologic_abs_paths_{$format}" => array(
          '#type' => 'textarea',
          '#title' => t('Additional paths to be considered local'),
          '#default_value' => variable_get("filter_pathologic_abs_paths_{$format}", ''),
          '#description' => t('Enter URIs of other Drupal installations which should be considered local in addition to the one for this particular Drupal installation (which is %path). If in doubt, enter the path to the Drupal installation&rsquo;s front page. Enter one path per line.', array(
            '%path' => _pathologic_url('<front>'),
          )),
        ),
      ),
    );
  }
  return $text;
}

/**
 * Return the hard part of the regular expression to be used when making paths
 * relative. $attr will probably be either 'href' or 'src'.
 */
function _pathologic_abs_regex($attr, $format) {
  static $pathstr = FALSE;
  if ($pathstr === FALSE) {
    $paths_field = trim(variable_get("filter_pathologic_abs_paths_{$format}", ''));
    if ($paths_field !== '') {

      // Get rid of spirious white space on each line
      $paths = array_map('trim', explode("\n", $paths_field));
    }
    else {
      $paths = array();
    }
    $paths[] = _pathologic_url('<front>');

    // It's possible the user entered the path for the current site in the box,
    // especially if the DB contents are shared between two different servers
    // (likely if it's a testing/production server thing). This won't screw up
    // the regex, but it will unnecessarily complicate it, so let's remove
    // duplicates from the array.
    $paths = array_unique($paths);
    $pathstr = ')="(' . implode('/?(index.php)?(\\?q=)?|', $paths) . '/?(index.php)?(\\?q=)?)([^"]*)"`';

    // $pathstr now looks like:
    // )="(http://abcde.fgh//?(index.php)?(\?q=)?|http://edcba.hgf//?(index.php)?(\?q=)?)([^"]*)";
  }
  return '`(' . $attr . $pathstr;

  // Returned value looks like:
  // (href)="(http://abcde.fgh//?(index.php)?(\?q=)?|http://edcba.hgf//?(index.php)?(\?q=)?)([^"]*)"
  // We want to match the attribute so that the callback
  // _pathologic_abs_to_rel() below can return a value with the attribute
  // without special trickery or creating duplicate functions.
}

/**
 * preg_replace_callback() callback function.
 * Aids in turning an absolute path to a page on the local server to a relative
 * path so that Pathologic can act on it further.
 */
function _pathologic_abs_to_rel($matches) {

  // The attribute will be the first match (after 0), and the path will be the
  // last.
  $path = array_pop($matches);
  if ($path === '') {

    // It looks like this was meant to be a link to the front page.
    return $matches[1] . '="<front>"';
  }
  return $matches[1] . '="' . $path . '"';
}

/**
 * preg_replace_callback() callback function.
 * Properly formats an HREF element. Here's where the magic happens…
 */
function _pathologic_do_href($matches) {
  return 'href="' . _pathologic_url($matches[1], $matches[2]) . '"';
}

/**
 * preg_replace_callback() callback function.
 * Okay. If we have a local SRC attribute, there are two possibilities.
 * 
 * One, the more likely one, is that we're referencing a file that physically
 * exists on the disk. In which case the SRC path should be passed along as is.
 *
 * Two, the less likely but still possible one, is that we're referencing a
 * Drupal path, in which case we should treat it like an HREF path and run it
 * through url().
 *
 * So let's check to see if the file actually exists. If it does, assume the
 * former; if not, assume the latter and run it through url().
 */
function _pathologic_do_src($matches) {
  if (file_exists($matches[1])) {
    global $base_url;
    return "src=\"{$base_url}/{$matches[1]}\"";
  }
  return 'src="' . _pathologic_url($matches[1]) . '"';
}

/**
 * Run the parts through url() if they look like they link to a local Drupal
 * path. Otherwise, it's a link to a file or something -- try to return it
 * in one piece.
 *
 * Okay, here's where the major changes are in the D5 version. menu_get_item()
 * has different parameters, and returns an empty array on failure.
 */
function _pathologic_url($path, $fragment = NULL) {

  // Does this look like an internal URL?
  if ($path !== '<front>' && count(menu_get_item(NULL, $path)) === 0 && drupal_get_path_alias($path) === $path) {

    // It's not.
    global $base_url;
    $return = $base_url . '/' . $path;
    if ($fragment !== NULL) {
      $return .= '#' . $fragment;
    }
    return $return;
  }
  return url($path, NULL, $fragment === '' ? NULL : $fragment, TRUE);
}

Functions

Namesort descending Description
pathologic_filter Implementation of hook_filter().
_pathologic_abs_regex Return the hard part of the regular expression to be used when making paths relative. $attr will probably be either 'href' or 'src'.
_pathologic_abs_to_rel preg_replace_callback() callback function. Aids in turning an absolute path to a page on the local server to a relative path so that Pathologic can act on it further.
_pathologic_do_href preg_replace_callback() callback function. Properly formats an HREF element. Here's where the magic happens…
_pathologic_do_src preg_replace_callback() callback function. Okay. If we have a local SRC attribute, there are two possibilities.
_pathologic_url Run the parts through url() if they look like they link to a local Drupal path. Otherwise, it's a link to a file or something -- try to return it in one piece.