You are here

function _footnotes_filter_url in Footnotes 6.2

Same name and namespace in other branches
  1. 5.2 footnotes.module \_footnotes_filter_url()

URL filter. Automatically converts text web addresses (URLs, e-mail addresses, ftp links, etc.) into hyperlinks.

1 call to _footnotes_filter_url()
footnotes_filter in ./footnotes.module
Implementation of hook_filter().

File

./footnotes.module, line 641
The Footnotes module is a filter that can be used to insert automatically numbered footnotes into Drupal texts.

Code

function _footnotes_filter_url($text, $format) {

  // List of tags - the content of which must be skipped.
  $ignoretags = 'a|script|style|code|textarea';

  // This filter identifies and makes clickable links of 3 types of "links".
  // 1) URL's like http://www.example.com.
  // 2) e-mail addresses like name@example.com.
  // 3) Web addresses without the "http://" protocol defined, like www.example.com.
  // Each type must be processed separately, as there is no one regular expression
  // that could possibly match all of the cases in one pass.
  //
  // Create an array which contains the regexps for each type of link.
  // The key to the regexp is the name of a function that is used as
  // callback function to process matches of the regexp. The callback function
  // is to return the replacement for the match.
  // The array is used and matching/replacement done below inside some loops.
  $tasks = NULL;

  // Match absolute URLs.
  $protocols = 'http://|https://|ftp://|mailto:|smb://|afp://|file://|gopher://|news://|ssl://|sslv2://|sslv3://|tls://|tcp://|udp://';
  $urlpattern = "(?:{$protocols})(?:[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+*~#&=/;-])";
  $re = "`({$urlpattern})([\\.\\,\\?\\!]*?)`i";
  $tasks['_footnotes_filter_url_parse_full_links'] = $re;

  // Match e-mail addresses.
  // Note: The ICANN seems to be on track towards accepting more diverse top level domains,
  // so this pattern has been "future-proofed" to allow for TLD's of length 2-64.
  $urlpattern = '[A-Za-z0-9._-]+@[A-Za-z0-9._+-]+\\.[A-Za-z]{2,64}';
  $re = "`({$urlpattern})`i";
  $tasks['_footnotes_filter_url_parse_email_links'] = $re;

  // Match www domains/addresses.
  $urlpattern = 'www\\.[a-zA-Z0-9@:%_+*~#?&=.,/;-]*[a-zA-Z0-9@:%_+~#\\&=/;-]';
  $re = "`({$urlpattern})([\\.\\,\\?\\!]*?)`i";
  $tasks['_footnotes_filter_url_parse_partial_links'] = $re;

  // Pass length to regexp callback.
  _footnotes_filter_url_trim(NULL, variable_get('footnotes_filter_url_length_' . $format, 72));

  // We need to process each case of replacement type separately.
  // The text must be joined and split again after each
  // replacement, since replacements create new HTML tags and the new
  // tags must be correctly protected before the next replacement can be done.
  foreach ($tasks as $task => $re) {

    // Split at all tags.
    // This ensures that nothing that is a tagname or attribute or html comment will be processed.
    $chunks = preg_split('/(<.+?>)/is', $text, -1, PREG_SPLIT_DELIM_CAPTURE);

    // Note: PHP ensures the array consists of alternating delimiters and literals
    // and begins and ends with a literal (inserting NULL as required).
    // Therefore, first chunk is always text:
    $chunk_type = 'text';

    // Tags to ignore are defined in $ignoretags (see above).
    // If an ignoretag is found, it is stored here and removed only when the
    // closing tag is found. Until the closing tag is found, no replacements are made.
    $opentag = '';
    for ($i = 0; $i < count($chunks); $i++) {
      if ($chunk_type == 'text') {

        // Only do replacements when there are no unclosed ignoretags.
        if ($opentag == '') {

          // This is the high point of this function! If there is a match,
          // a link is created in the callback function named by $task.
          $chunks[$i] = preg_replace_callback($re, $task, $chunks[$i]);
        }

        // Done processing text chunk, so next chunk is a tag.
        $chunk_type = 'tag';
      }
      else {
        if ($opentag == '') {

          // No open ignoretags. Process this tag...
          if (preg_match("`<({$ignoretags})(?:\\s|>)`i", $chunks[$i], $matches)) {

            // This matches one of the $ignoretags.
            // Catch and store the tag in question.
            $opentag = $matches[1];
          }
        }
        else {

          // There is an $ignoretag open. See if this is a matching closing tag.
          // Nothing else is done until we find the closing tag.
          if (preg_match("`<\\/{$opentag}>`i", $chunks[$i], $matches)) {
            $opentag = '';
          }
        }

        // Done processing tag chunk, so next chunk is text.
        $chunk_type = 'text';
      }
    }
    $text = implode($chunks);
  }
  return $text;
}