You are here

function _pathologic_replace in Pathologic 7.3

Same name and namespace in other branches
  1. 8 pathologic.module \_pathologic_replace()
  2. 6.3 pathologic.module \_pathologic_replace()
  3. 7 pathologic.module \_pathologic_replace()
  4. 7.2 pathologic.module \_pathologic_replace()

Process and replace paths. preg_replace_callback() callback.

1 string reference to '_pathologic_replace'
_pathologic_filter in ./pathologic.module
Pathologic filter callback.

File

./pathologic.module, line 213
Pathologic text filter for Drupal.

Code

function _pathologic_replace($matches) {

  // Get the base path.
  global $base_path;

  // Get the settings for the filter. Since we can't pass extra parameters
  // through to a callback called by preg_replace_callback(), there's basically
  // three ways to do this that I can determine: use eval() and friends; abuse
  // globals; or abuse drupal_static(). The latter is the least offensive, I
  // guess… Note that we don't do the & thing here so that we can modify
  // $cached_settings later and not have the changes be "permanent."
  $cached_settings = drupal_static('_pathologic_filter');

  // If it appears the path is a scheme-less URL, prepend a scheme to it.
  // parse_url() cannot properly parse scheme-less URLs. Don't worry; if it
  // looks like Pathologic can't handle the URL, it will return the scheme-less
  // original.
  // @see https://drupal.org/node/1617944
  // @see https://drupal.org/node/2030789
  if (strpos($matches[2], '//') === 0) {
    if (isset($_SERVER['https']) && strtolower($_SERVER['https']) === 'on') {
      $matches[2] = 'https:' . $matches[2];
    }
    else {
      $matches[2] = 'http:' . $matches[2];
    }
  }

  // Now parse the URL after reverting HTML character encoding.
  // @see http://drupal.org/node/1672932
  $original_url = htmlspecialchars_decode($matches[2]);

  // …and parse the URL
  $parts = @parse_url($original_url);

  // Do some more early tests to see if we should just give up now.
  if (empty($parts) || isset($parts['scheme']) && !in_array($parts['scheme'], variable_get('pathologic_scheme_whitelist', array(
    'http',
    'https',
    'files',
    'internal',
  ))) || isset($parts['fragment']) && count($parts) === 1) {

    // Give up by "replacing" the original with the same.
    return $matches[0];
  }
  if (isset($parts['path'])) {

    // Undo possible URL encoding in the path.
    // @see http://drupal.org/node/1672932
    $parts['path'] = rawurldecode($parts['path']);
  }
  else {
    $parts['path'] = '';
  }

  // Check to see if we're dealing with a file.
  // @todo Should we still try to do path correction on these files too?
  if (isset($parts['scheme']) && $parts['scheme'] === 'files') {

    // Path Filter "files:" support. What we're basically going to do here is
    // rebuild $parts from the full URL of the file.
    $new_parts = @parse_url(file_create_url(file_default_scheme() . '://' . $parts['path']));

    // If there were query parts from the original parsing, copy them over.
    if (!empty($parts['query'])) {
      $new_parts['query'] = $parts['query'];
    }
    $new_parts['path'] = rawurldecode($new_parts['path']);
    $parts = $new_parts;

    // Don't do language handling for file paths.
    $cached_settings['is_file'] = TRUE;
  }
  else {
    $cached_settings['is_file'] = FALSE;
  }

  // Let's also bail out of this doesn't look like a local path.
  $found = FALSE;

  // Cycle through local paths and find one with a host and a path that matches;
  // or just a host if that's all we have; or just a starting path if that's
  // what we have.
  foreach ($cached_settings['current_settings']['local_paths_exploded'] as $exploded) {

    // If a path is available in both…
    if (isset($exploded['path']) && isset($parts['path']) && strpos($parts['path'], $exploded['path']) === 0 && (isset($exploded['host']) && isset($parts['host']) && $exploded['host'] === $parts['host'] || !isset($exploded['host']) && !isset($parts['host']))) {

      // Remove the shared path from the path. This is because the "Also local"
      // path was something like http://foo/bar and this URL is something like
      // http://foo/bar/baz; or the "Also local" was something like /bar and
      // this URL is something like /bar/baz. And we only care about the /baz
      // part.
      $parts['path'] = drupal_substr($parts['path'], drupal_strlen($exploded['path']));
      $found = TRUE;

      // Break out of the foreach loop
      break;
    }
    elseif (isset($parts['host']) && !isset($exploded['path']) && isset($exploded['host']) && $exploded['host'] === $parts['host']) {

      // No further editing; just continue
      $found = TRUE;

      // Break out of foreach loop
      break;
    }
    elseif (!isset($parts['host']) && (!isset($exploded['path']) || $exploded['path'] === $base_path)) {
      $found = TRUE;
    }
  }

  // If the path is not within the drupal root return original url, unchanged
  if (!$found) {
    return $matches[0];
  }

  // Okay, format the URL.
  // If there's still a slash lingering at the start of the path, chop it off.
  $parts['path'] = ltrim($parts['path'], '/');

  // Examine the query part of the URL. Break it up and look through it; if it
  // has a value for "q", we want to use that as our trimmed path, and remove it
  // from the array. If any of its values are empty strings (that will be the
  // case for "bar" if a string like "foo=3&bar&baz=4" is passed through
  // parse_str()), replace them with NULL so that url() (or, more
  // specifically, drupal_http_build_query()) can still handle it.
  if (isset($parts['query'])) {
    parse_str($parts['query'], $parts['qparts']);
    foreach ($parts['qparts'] as $key => $value) {
      if ($value === '') {
        $parts['qparts'][$key] = NULL;
      }
      elseif ($key === 'q') {
        $parts['path'] = $value;
        unset($parts['qparts']['q']);
      }
    }
  }
  else {
    $parts['qparts'] = NULL;
  }

  // If we don't have a path yet, bail out.
  if (!isset($parts['path'])) {
    return $matches[0];
  }

  // If we didn't previously identify this as a file, check to see if the file
  // exists now that we have the correct path relative to DRUPAL_ROOT
  if (!$cached_settings['is_file']) {
    $cached_settings['is_file'] = !empty($parts['path']) && (is_file(DRUPAL_ROOT . '/' . $parts['path']) || _pathologic_is_file_directory($parts['path']));
  }

  // Okay, deal with language stuff.
  $language_list = language_list();
  if ($cached_settings['is_file']) {

    // If we're linking to a file, use a fake LANGUAGE_NONE language object.
    // Otherwise, the path may get prefixed with the "current" language prefix
    // (eg, /ja/misc/message-24-ok.png)
    $parts['language_obj'] = (object) array(
      'language' => LANGUAGE_NONE,
      'prefix' => '',
    );
  }
  else {

    // Let's see if we can split off a language prefix from the path.
    if (module_exists('locale')) {

      // Sometimes this file will be require_once-d by the locale module before
      // this point, and sometimes not. We require_once it ourselves to be sure.
      require_once DRUPAL_ROOT . '/includes/language.inc';
      list($language_obj, $path) = language_url_split_prefix($parts['path'], $language_list);
      if ($language_obj) {
        $parts['path'] = $path;
        $parts['language_obj'] = $language_obj;
      }
    }
    if (empty($parts['language_obj']) && !empty($cached_settings['langcode']) && !empty($language_list[$cached_settings['langcode']])) {
      $parts['language_obj'] = $language_list[$cached_settings['langcode']];
    }
  }

  // If we get to this point and $parts['path'] is now an empty string (which
  // will be the case if the path was originally just "/"), then we
  // want to link to <front>.
  if ($parts['path'] === '') {
    $parts['path'] = '<front>';
  }

  // Build the parameters we will send to url()
  $url_params = array(
    'path' => $parts['path'],
    'options' => array(
      'query' => $parts['qparts'],
      'fragment' => isset($parts['fragment']) ? $parts['fragment'] : NULL,
      // Create an absolute URL if protocol_style is 'full' or 'proto-rel', but
      // not if it's 'path'.
      'absolute' => $cached_settings['current_settings']['protocol_style'] !== 'path',
      // If we seem to have found a language for the path, pass it along to
      // url(). Otherwise, ignore the 'language' parameter.
      'language' => isset($parts['language_obj']) ? $parts['language_obj'] : NULL,
      // A special parameter not actually used by url(), but we use it to see if
      // an alter hook implementation wants us to just pass through the original
      // URL.
      'use_original' => FALSE,
    ),
  );

  // Add the original URL to the parts array
  $parts['original'] = $original_url;

  // Now alter!
  // @see http://drupal.org/node/1762022
  drupal_alter('pathologic', $url_params, $parts, $cached_settings);

  // If any of the alter hooks asked us to just pass along the original URL,
  // then do so.
  if ($url_params['options']['use_original']) {
    return $matches[0];
  }

  // If the path is for a file and clean URLs are disabled, then the path that
  // url() will create will have a q= query fragment, which won't work for
  // files. To avoid that, we use this trick to temporarily turn clean URLs on.
  // This is horrible, but it seems to be the sanest way to do this.
  // @see http://drupal.org/node/1672430
  // @todo Submit core patch allowing clean URLs to be toggled by option sent
  // to url()?
  if (!empty($cached_settings['is_file'])) {
    $cached_settings['orig_clean_url'] = !empty($GLOBALS['conf']['clean_url']);
    if (!$cached_settings['orig_clean_url']) {
      $GLOBALS['conf']['clean_url'] = TRUE;
    }
  }

  // Now for the url() call. Drumroll, please…
  $url = url($url_params['path'], $url_params['options']);

  // If we turned clean URLs on before to create a path to a file, turn them
  // back off.
  if ($cached_settings['is_file'] && !$cached_settings['orig_clean_url']) {
    $GLOBALS['conf']['clean_url'] = FALSE;
  }

  // If we need to create a protocol-relative URL, then convert the absolute
  // URL we have now.
  if ($cached_settings['current_settings']['protocol_style'] === 'proto-rel') {

    // Now, what might have happened here is that url() returned a URL which
    // isn't on "this" server due to a hook_url_outbound_alter() implementation.
    // We don't want to convert the URL in that case. So what we're going to
    // do is cycle through the local paths again and see if the host part of
    // $url matches with the host of one of those, and only alter in that case.
    $url_parts = @parse_url($url);
    if (!empty($url_parts['host']) && $url_parts['host'] === $cached_settings['current_settings']['base_url_host']) {
      $url = _pathologic_url_to_protocol_relative($url);
    }
  }

  // Apply HTML character encoding, as is required for HTML attributes.
  // @see http://drupal.org/node/1672932
  $url = check_plain($url);

  // $matches[1] will be the tag attribute; src, href, etc.
  return " {$matches[1]}=\"{$url}";
}