You are here

function _pathologic_replace in Pathologic 8

Same name and namespace in other branches
  1. 6.3 pathologic.module \_pathologic_replace()
  2. 7.3 pathologic.module \_pathologic_replace()
  3. 7 pathologic.module \_pathologic_replace()
  4. 7.2 pathologic.module \_pathologic_replace()

Process and replace paths. preg_replace_callback() callback.

1 string reference to '_pathologic_replace'
_pathologic_filter in ./pathologic.module
Pathologic filter callback.

File

./pathologic.module, line 119
Pathologic text filter for Drupal.

Code

function _pathologic_replace($matches) {

  // Get the base path.
  global $base_path;

  // Get the settings for the filter. Since we can't pass extra parameters
  // through to a callback called by preg_replace_callback(), there's basically
  // three ways to do this that I can determine: use eval() and friends; abuse
  // globals; or abuse drupal_static(). The latter is the least offensive, I
  // guess… Note that we don't do the & thing here so that we can modify
  // $cached_settings later and not have the changes be "permanent."
  $cached_settings = drupal_static('_pathologic_filter');

  // If it appears the path is a scheme-less URL, prepend a scheme to it.
  // parse_url() cannot properly parse scheme-less URLs. Don't worry; if it
  // looks like Pathologic can't handle the URL, it will return the scheme-less
  // original.
  // @see https://drupal.org/node/1617944
  // @see https://drupal.org/node/2030789
  if (strpos($matches[2], '//') === 0) {
    if (\Drupal::request()
      ->isSecure()) {
      $matches[2] = 'https:' . $matches[2];
    }
    else {
      $matches[2] = 'http:' . $matches[2];
    }
  }

  // Now parse the URL after reverting HTML character encoding.
  // @see http://drupal.org/node/1672932
  $original_url = htmlspecialchars_decode($matches[2]);

  // …and parse the URL
  $parts = parse_url($original_url);

  // Do some more early tests to see if we should just give up now.
  if (empty($parts) || isset($parts['scheme']) && !in_array($parts['scheme'], \Drupal::config('pathologic.settings')
    ->get('scheme_whitelist')) || isset($parts['fragment']) && count($parts) === 1) {

    // Give up by "replacing" the original with the same.
    return $matches[0];
  }
  if (isset($parts['path'])) {

    // Undo possible URL encoding in the path.
    // @see http://drupal.org/node/1672932
    $parts['path'] = rawurldecode($parts['path']);
  }
  else {
    $parts['path'] = '';
  }

  // Check to see if we're dealing with a file.
  // @todo Should we still try to do path correction on these files too?
  if (isset($parts['scheme']) && $parts['scheme'] === 'files') {

    // Path Filter "files:" support. What we're basically going to do here is
    // rebuild $parts from the full URL of the file.
    $new_parts = parse_url(file_create_url(\Drupal::config('system.file')
      ->get('default_scheme') . '://' . $parts['path']));

    // If there were query parts from the original parsing, copy them over.
    if (!empty($parts['query'])) {
      $new_parts['query'] = $parts['query'];
    }
    $new_parts['path'] = rawurldecode($new_parts['path']);
    $parts = $new_parts;

    // Don't do language handling for file paths.
    $cached_settings['is_file'] = TRUE;
  }
  else {
    $cached_settings['is_file'] = FALSE;
  }

  // Let's also bail out of this doesn't look like a local path.
  $found = FALSE;

  // Cycle through local paths and find one with a host and a path that matches;
  // or just a host if that's all we have; or just a starting path if that's
  // what we have.
  foreach ($cached_settings['current_settings']['local_paths_exploded'] as $exploded) {

    // If a path is available in both…
    if (isset($exploded['path']) && isset($parts['path']) && strpos($parts['path'], $exploded['path']) === 0 && (isset($exploded['host']) && isset($parts['host']) && $exploded['host'] === $parts['host'] || !isset($exploded['host']) && !isset($parts['host']))) {

      // Remove the shared path from the path. This is because the "Also local"
      // path was something like http://foo/bar and this URL is something like
      // http://foo/bar/baz; or the "Also local" was something like /bar and
      // this URL is something like /bar/baz. And we only care about the /baz
      // part.
      $parts['path'] = mb_substr($parts['path'], mb_strlen($exploded['path']));
      $found = TRUE;

      // Break out of the foreach loop
      break;
    }
    elseif (isset($parts['host']) && !isset($exploded['path']) && isset($exploded['host']) && $exploded['host'] === $parts['host']) {

      // No further editing; just continue
      $found = TRUE;

      // Break out of foreach loop
      break;
    }
    elseif (!isset($parts['host']) && (!isset($exploded['path']) || $exploded['path'] === $base_path)) {
      $found = TRUE;
    }
  }

  // If the path is not within the drupal root return original url, unchanged
  if (!$found) {
    return $matches[0];
  }

  // Okay, format the URL.
  // If there's still a slash lingering at the start of the path, chop it off.
  $parts['path'] = ltrim($parts['path'], '/');

  // Examine the query part of the URL. Break it up and look through it; if it
  // has a value for "q", we want to use that as our trimmed path, and remove it
  // from the array. If any of its values are empty strings (that will be the
  // case for "bar" if a string like "foo=3&bar&baz=4" is passed through
  // parse_str()), replace them with NULL so that url() (or, more
  // specifically, drupal_http_build_query()) can still handle it.
  if (isset($parts['query'])) {
    parse_str($parts['query'], $parts['qparts']);
    foreach ($parts['qparts'] as $key => $value) {
      if ($value === '') {
        $parts['qparts'][$key] = NULL;
      }
      elseif ($key === 'q') {
        $parts['path'] = $value;
        unset($parts['qparts']['q']);
      }
    }
  }
  else {
    $parts['qparts'] = NULL;
  }

  // If we don't have a path yet, bail out.
  if (!isset($parts['path'])) {
    return $matches[0];
  }

  // If this looks like a D8-style unclean URL, crop off the "index.php/" from
  // the beginning.
  if (strpos($parts['path'], 'index.php/') === 0) {
    $parts['path'] = substr($parts['path'], 10);
  }

  // If we didn't previously identify this as a file, check to see if the file
  // exists now that we have the correct path relative to DRUPAL_ROOT
  if (!$cached_settings['is_file']) {
    $cached_settings['is_file'] = !empty($parts['path']) && is_file(DRUPAL_ROOT . '/' . $parts['path']);
  }

  // Okay, deal with language stuff.
  // Let's see if we can split off a language prefix from the path.
  if (\Drupal::moduleHandler()
    ->moduleExists('language')) {

    // This logic is based on
    // \Drupal\language\Plugin\LanguageNegotiation\LanguageNegotiationUrl::getLangcode().
    $languages = \Drupal::languageManager()
      ->getLanguages();
    $config = \Drupal::config('language.negotiation')
      ->get('url');
    $request_path = urldecode(trim($parts['path'], '/'));
    $path_args = explode('/', $request_path);
    $prefix = array_shift($path_args);

    // Search for prefix within added languages.
    foreach ($languages as $language) {
      if (isset($config['prefixes'][$language
        ->getId()]) && $config['prefixes'][$language
        ->getId()] == $prefix) {
        $parts['path'] = implode('/', $path_args);
        $parts['language_obj'] = $language;
        break;
      }
    }
  }

  // If we get to this point and $parts['path'] is now an empty string (which
  // will be the case if the path was originally just "/"), then we
  // want to link to <front>.
  if ($parts['path'] === '') {
    $parts['path'] = '<front>';
  }

  // Build the parameters we will send to url()
  $url_params = [
    'path' => $parts['path'],
    'options' => [
      'query' => $parts['qparts'],
      'fragment' => isset($parts['fragment']) ? $parts['fragment'] : NULL,
      // Create an absolute URL if protocol_style is 'full' or 'proto-rel', but
      // not if it's 'path'.
      'absolute' => $cached_settings['current_settings']['protocol_style'] !== 'path',
      // If we seem to have found a language for the path, pass it along to
      // url(). Otherwise, ignore the 'language' parameter.
      'language' => isset($parts['language_obj']) ? $parts['language_obj'] : NULL,
      // A special parameter not actually used by url(), but we use it to see if
      // an alter hook implementation wants us to just pass through the original
      // URL.
      'use_original' => FALSE,
    ],
  ];

  // Add the original URL to the parts array
  $parts['original'] = $original_url;

  // Now alter!
  // @see http://drupal.org/node/1762022
  \Drupal::moduleHandler()
    ->alter('pathologic', $url_params, $parts, $cached_settings);

  // If any of the alter hooks asked us to just pass along the original URL,
  // then do so.
  if ($url_params['options']['use_original']) {
    return $matches[0];
  }

  // Now to build the URL. Drumroll, please…
  if ($parts['path'] == '<front>') {
    $url = Url::fromRoute('<front>', [], $url_params['options'])
      ->toString();
  }
  else {
    try {
      $url = Url::fromUri('base://' . $url_params['path'], $url_params['options'])
        ->toString();
    } catch (\Exception $e) {

      // In case of an error, e.g. completely invalid URL, return it unchanged.
      return $matches[0];
    }
  }

  // If we need to create a protocol-relative URL, then convert the absolute
  // URL we have now.
  if ($cached_settings['current_settings']['protocol_style'] === 'proto-rel') {

    // Now, what might have happened here is that url() returned a URL which
    // isn't on "this" server due to a hook_url_outbound_alter() implementation.
    // We don't want to convert the URL in that case. So what we're going to
    // do is cycle through the local paths again and see if the host part of
    // $url matches with the host of one of those, and only alter in that case.
    $url_parts = parse_url($url);
    if (!empty($url_parts['host']) && $url_parts['host'] === $cached_settings['current_settings']['base_url_host']) {
      $url = _pathologic_url_to_protocol_relative($url);
    }
  }

  // Apply HTML character encoding, as is required for HTML attributes.
  // @see http://drupal.org/node/1672932
  $url = Html::escape($url);

  // $matches[1] will be the tag attribute; src, href, etc.
  return " {$matches[1]}=\"{$url}";
}