You are here

pathologic.module in Pathologic 6

The main and only module file.

File

pathologic.module
View source
<?php

/**
 * @defgroup pathologic Pathologic text filter for Drupal
 *
 * This input filter attempts to make sure that link and image paths will
 * always be correct, even when domain names change, content is moved from one
 * server to another, Clean URLs is toggled, etc.
 */

/**
 * @file
 * The main and only module file.
 *
 * @ingroup pathologic
 */

/**
 * Implementation of hook_filter().
 */
function pathologic_filter($op, $delta = 0, $format = -1, $text = '') {
  if ($op === 'process' && $text !== '') {
    if (variable_get("filter_pathologic_href_{$format}", TRUE)) {

      // Do transformation on HREF paths
      // Make relative
      $text = preg_replace_callback(_pathologic_abs_regex('href', $format), '_pathologic_abs_to_rel', $text);

      // Transform attributes
      $text = preg_replace_callback('|href="([^/#][^:"#]+)#?([^:"]*)"|', '_pathologic_do_href', $text);
    }
    if (variable_get("filter_pathologic_src_{$format}", TRUE)) {

      // Do transformation on SRC paths
      // Make relative
      $text = preg_replace_callback(_pathologic_abs_regex('src', $format), '_pathologic_abs_to_rel', $text);

      // Transform attributes
      $text = preg_replace_callback('|src="([^/][^:"]+)"|', '_pathologic_do_src', $text);
    }
    return $text;
  }
  if ($op === 'list') {
    return array(
      t('Pathologic'),
    );
  }
  if ($op === 'description') {
    return t('Corrects paths in content which reference pages or media on this server.');
  }
  if ($op === 'settings') {
    return array(
      'filter_pathologic' => array(
        '#type' => 'fieldset',
        '#title' => t('Pathologic'),
        '#collapsible' => TRUE,
        "filter_pathologic_href_{$format}" => array(
          '#type' => 'checkbox',
          '#title' => t('Transform values of <em>href</em> attributes'),
          '#default_value' => intval(variable_get("filter_pathologic_href_{$format}", 1)),
          '#description' => t('<em>href</em> attributes are used in link tags.'),
        ),
        "filter_pathologic_src_{$format}" => array(
          '#type' => 'checkbox',
          '#title' => t('Transform values of <em>src</em> attributes'),
          '#default_value' => intval(variable_get("filter_pathologic_src_{$format}", 1)),
          '#description' => t('<em>src</em> attributes are used in image tags and tags for other types of embedded media.'),
        ),
        "filter_pathologic_abs_paths_{$format}" => array(
          '#type' => 'textarea',
          '#title' => t('Additional paths to be considered local'),
          '#default_value' => variable_get("filter_pathologic_abs_paths_{$format}", ''),
          '#description' => t('Enter URIs of other Drupal installations which should be considered local in addition to the one for this particular Drupal installation (which is %path). If in doubt, enter the path to the Drupal installation&rsquo;s front page. Enter one path per line.', array(
            '%path' => _pathologic_url('<front>'),
          )),
        ),
      ),
    );
  }
  return $text;
}

/**
 * Return the hard part of the regular expression to be used when making paths
 * relative. $attr will probably be either 'href' or 'src'.
 */
function _pathologic_abs_regex($attr, $format) {
  static $pathstr = FALSE;
  if ($pathstr === FALSE) {
    $paths_field = trim(variable_get("filter_pathologic_abs_paths_{$format}", ''));
    if ($paths_field !== '') {

      // Get rid of spirious white space on each line
      $paths = array_map('trim', explode("\n", $paths_field));
    }
    else {
      $paths = array();
    }
    $paths[] = _pathologic_url('<front>');

    // It's possible the user entered the path for the current site in the box,
    // especially if the DB contents are shared between two different servers
    // (likely if it's a testing/production server thing). This won't screw up
    // the regex, but it will unnecessarily complicate it, so let's remove
    // duplicates from the array.
    $paths = array_unique($paths);
    $pathstr = ')="(' . implode('/?(index.php)?(\\?q=)?|', $paths) . '/?(index.php)?(\\?q=)?)([^"]*)"`';

    // $pathstr now looks like:
    // )="(http://abcde.fgh//?(index.php)?(\?q=)?|http://edcba.hgf//?(index.php)?(\?q=)?)([^"]*)";
  }
  return '`(' . $attr . $pathstr;

  // Returned value looks like:
  // (href)="(http://abcde.fgh//?(index.php)?(\?q=)?|http://edcba.hgf//?(index.php)?(\?q=)?)([^"]*)"
  // We want to match the attribute so that the callback
  // _pathologic_abs_to_rel() below can return a value with the attribute
  // without special trickery or creating duplicate functions.
}

/**
 * Aids in turning an absolute path to a page on the local server to a relative
 * path so that Pathologic can act on it further. preg_replace_callback()
 * callback function.
 */
function _pathologic_abs_to_rel($matches) {

  // The attribute will be the first match (after 0), and the path will be the
  // last.
  $path = array_pop($matches);
  if ($path === '') {

    // It looks like this was meant to be a link to the front page.
    return $matches[1] . '="<front>"';
  }
  return $matches[1] . '="' . $path . '"';
}

/**
 * Properly formats an HREF element. Here's where the magic happens…
 * preg_replace_callback() callback function.
 */
function _pathologic_do_href($matches) {
  return 'href="' . _pathologic_url($matches[1], $matches[2]) . '"';
}

/**
 * Return a formatted SRC attribute.
 *
 * You might think SRC attributes will always be disk files instead of Drupal
 * paths, but that's not always the case - Imagecache relies on file paths
 * actually being Drupal paths, at least initially. preg_replace_callback()
 * callback function.
 */
function _pathologic_do_src($matches) {
  if (file_exists($matches[1])) {
    global $base_url;
    return "src=\"{$base_url}/{$matches[1]}\"";
  }
  return 'src="' . _pathologic_url($matches[1]) . '"';
}

/**
 * Run the parts through url() if they look like they link to a local Drupal
 * path. Otherwise, it's a link to a file or something -- try to return it
 * in one piece.
 */
function _pathologic_url($path, $fragment = NULL) {

  // Does this look like an internal URL?
  if ($path !== '<front>' && !menu_get_item($path) && !drupal_lookup_path('source', $path)) {

    // It's not.
    global $base_url;
    $return = $base_url . '/' . $path;
    if ($fragment !== NULL && $fragment !== '') {
      $return .= '#' . $fragment;
    }
    return $return;
  }
  return url($path, array(
    'fragment' => $fragment,
    'absolute' => TRUE,
  ));
}

Related topics

Functions

Namesort descending Description
pathologic_filter Implementation of hook_filter().
_pathologic_abs_regex Return the hard part of the regular expression to be used when making paths relative. $attr will probably be either 'href' or 'src'.
_pathologic_abs_to_rel Aids in turning an absolute path to a page on the local server to a relative path so that Pathologic can act on it further. preg_replace_callback() callback function.
_pathologic_do_href Properly formats an HREF element. Here's where the magic happens… preg_replace_callback() callback function.
_pathologic_do_src Return a formatted SRC attribute.
_pathologic_url Run the parts through url() if they look like they link to a local Drupal path. Otherwise, it's a link to a file or something -- try to return it in one piece.