You are here

function link_url_type in Link 7

Type check a URL.

Accepts all URLs following RFC 1738 standard for URL formation and all e-mail addresses following the RFC 2368 standard for mailto address formation.

Parameters

string $text: Url to be checked.

Return value

mixed Returns boolean FALSE if the URL is not valid. On success, returns one of the LINK_(linktype) constants.

8 calls to link_url_type()
LinkFieldValidateTest::testLinkInternalUrl in tests/LinkFieldValidateTest.test
Validate that an internal url would be accepted.
LinkValidationApiTest::testValidateExternalLinks in tests/LinkValidationApiTest.test
Validate External Links.
LinkValidationApiTest::testValidateInternalLinks in tests/LinkValidationApiTest.test
Validate Internal Links.
LinkValidationApiTest::testValidateTelLinks in tests/LinkValidationApiTest.test
Confirm that valid tel: links work as expected.
LinkValidationApiTest::testValidateTelLinksBad in tests/LinkValidationApiTest.test
Confirm that invalid tel: links work as expected.

... See full list

File

./link.module, line 1690
Defines simple link field types.

Code

function link_url_type($text) {

  // @todo Complete letters.
  // @codingStandardsIgnoreStart
  $link_ichars_domain = (string) html_entity_decode(implode("", array(
    "¿",
    // ¿
    "À",
    // À
    "Á",
    // Á
    "Â",
    // Â
    "Ã",
    // Ã
    "Ä",
    // Ä
    "Å",
    // Å
    "Æ",
    // Æ
    "Ç",
    // Ç
    "È",
    // È
    "É",
    // É
    "Ê",
    // Ê
    "Ë",
    // Ë
    "Ì",
    // Ì
    "Í",
    // Í
    "Î",
    // Î
    "Ï",
    // Ï
    "Ð",
    // Ð
    "Ñ",
    // Ñ
    "Ò",
    // Ò
    "Ó",
    // Ó
    "Ô",
    // Ô
    "Õ",
    // Õ
    "Ö",
    // Ö
    // ×
    "Ø",
    // Ø
    "Ù",
    // Ù
    "Ú",
    // Ú
    "Û",
    // Û
    "Ü",
    // Ü
    "Ý",
    // Ý
    "Þ",
    // Þ
    // ß (see LINK_ICHARS)
    "à",
    // à
    "á",
    // á
    "â",
    // â
    "ã",
    // ã
    "ä",
    // ä
    "å",
    // å
    "æ",
    // æ
    "ç",
    // ç
    "è",
    // è
    "é",
    // é
    "ê",
    // ê
    "ë",
    // ë
    "ì",
    // ì
    "í",
    // í
    "î",
    // î
    "ï",
    // ï
    "ð",
    // ð
    "ñ",
    // ñ
    "ò",
    // ò
    "ó",
    // ó
    "ô",
    // ô
    "õ",
    // õ
    "ö",
    // ö
    // ÷
    "ø",
    // ø
    "ù",
    // ù
    "ú",
    // ú
    "û",
    // û
    "ü",
    // ü
    "ý",
    // ý
    "þ",
    // þ
    "ÿ",
    // ÿ
    "Œ",
    // Œ
    "œ",
    // œ
    "Ÿ",
  )), ENT_QUOTES, 'UTF-8');

  // @codingStandardsIgnoreEnd
  $link_ichars = $link_ichars_domain . (string) html_entity_decode(implode("", array(
    // ß.
    "ß",
  )), ENT_QUOTES, 'UTF-8');
  $allowed_protocols = variable_get('filter_allowed_protocols', array(
    'http',
    'https',
    'ftp',
    'file',
    'news',
    'nntp',
    'telnet',
    'mailto',
    'irc',
    'ssh',
    'sftp',
    'webcal',
    'tel',
  ));
  $link_domains = _link_domains();

  // Starting a parenthesis group with (?: means that it is grouped, but is not
  // captured.
  $protocol = '((?:' . implode("|", $allowed_protocols) . '):\\/\\/)';
  $authentication = "(?:(?:(?:[\\w\\.\\-\\+!\$&'\\(\\)*\\+,;=" . $link_ichars . "]|%[0-9a-f]{2})+(?::(?:[\\w" . $link_ichars . "\\.\\-\\+%!\$&'\\(\\)*\\+,;=]|%[0-9a-f]{2})*)?)?@)";
  $domain = '(?:(?:[a-zA-Z0-9' . $link_ichars_domain . ']([a-zA-Z0-9' . $link_ichars_domain . '\\-_\\[\\]])*)(\\.(([a-zA-Z0-9' . $link_ichars_domain . '\\-_\\[\\]])+\\.)*(' . $link_domains . '|[a-z]{2}))?)';
  $ipv4 = '(?:[0-9]{1,3}(\\.[0-9]{1,3}){3})';
  $ipv6 = '(?:[0-9a-fA-F]{1,4}(\\:[0-9a-fA-F]{1,4}){7})';
  $port = '(?::([0-9]{1,5}))';

  // Pattern specific to external links.
  $external_pattern = '/^' . $protocol . '?' . $authentication . '?(' . $domain . '|' . $ipv4 . '|' . $ipv6 . ' |localhost)' . $port . '?';

  // Pattern specific to internal links.
  $internal_pattern = "/^(?:[a-z0-9" . $link_ichars . "_\\-+\\[\\] ]+)";
  $internal_pattern_file = "/^(?:[a-z0-9" . $link_ichars . "_\\-+\\[\\]\\. \\/\\(\\)][a-z0-9" . $link_ichars . "_\\-+\\[\\]\\. \\(\\)][a-z0-9" . $link_ichars . "_\\-+\\[\\]\\. \\/\\(\\)]+)\$/i";
  $directories = "(?:\\/[a-z0-9" . $link_ichars . "_\\-\\.~+%=&,\$'#!():;*@\\[\\]]*)*";

  // Yes, four backslashes == a single backslash.
  $query = "(?:\\/?\\?([?a-zA-Z0-9" . $link_ichars . "+_|\\-\\.~\\/\\\\%=&,\$'!():;*@\\[\\]{} ]*))";
  $anchor = "(?:#[a-zA-Z0-9" . $link_ichars . "_\\-\\.~+%=&,\$'():;*@\\[\\]\\/\\?!]*)";

  // The rest of the path for a standard URL.
  // @codingStandardsIgnoreLine
  $end = $directories . '?' . $query . '?' . $anchor . '?' . '$/i';
  $message_id = '[^@].*@' . $domain;
  $newsgroup_name = '(?:[0-9a-z+-]*\\.)*[0-9a-z+-]*';
  $news_pattern = '/^news:(' . $newsgroup_name . '|' . $message_id . ')$/i';
  $user = '[a-zA-Z0-9' . $link_ichars . '_\\-\\.\\+\\^!#\\$%&*+\\/\\=\\?\\`\\|\\{\\}~\'\\[\\]]+';
  $email_pattern = '/^mailto:' . $user . '@' . '(?:' . $domain . '|' . $ipv4 . '|' . $ipv6 . '|localhost)' . $query . '?$/';
  $tel_pattern = '/^tel:(?:\\+[1-9]\\d{1,14}|\\d{2,15})$/';
  $file_pattern = "/^(?:file:\\/\\/)" . "(?:\\/?[a-z0-9" . $link_ichars . "_\\-\\.\\\\~+%=&,\$'#!():;*@\\[\\]]*)*" . '$/i';
  if (strpos($text, '<front>') === 0) {
    return LINK_FRONT;
  }
  if (in_array('mailto', $allowed_protocols) && preg_match($email_pattern, $text)) {
    return LINK_EMAIL;
  }
  if (strpos($text, '#') === 0) {
    return LINK_FRAGMENT;
  }
  if (strpos($text, '?') === 0) {
    return LINK_QUERY;
  }
  if (in_array('tel', $allowed_protocols) && strpos($text, 'tel:') === 0) {
    if (preg_match($tel_pattern, $text)) {

      // Based on our tel pattern this is a 'valid' phone number so return tel
      // type.
      return LINK_TEL;
    }
    else {

      // Based on our tel pattern this is using the tel protocol, but is not a
      // 'valid' phone number. If we don't return false here $text will match
      // LINK_EXTERNAL which is incorrect.
      return FALSE;
    }
  }
  if (in_array('news', $allowed_protocols) && preg_match($news_pattern, $text)) {
    return LINK_NEWS;
  }
  if (in_array('file', $allowed_protocols) && preg_match($file_pattern, $text, $as)) {
    return LINK_FILE;
  }
  if (preg_match($internal_pattern . $end, $text)) {
    return LINK_INTERNAL;
  }
  if (drupal_valid_path($text) && url_is_external($text) == FALSE) {
    return LINK_INTERNAL;
  }
  if (preg_match($external_pattern . $end, $text)) {
    return LINK_EXTERNAL;
  }
  if (preg_match($internal_pattern_file, $text)) {
    return LINK_INTERNAL;
  }
  return FALSE;
}