You are here

function link_validate_url in Link 6.2

Same name and namespace in other branches
  1. 5 link.module \link_validate_url()
  2. 6 link.module \link_validate_url()
  3. 7 link.module \link_validate_url()

A lenient verification for URLs. Accepts all URLs following RFC 1738 standard for URL formation and all email addresses following the RFC 2368 standard for mailto address formation.

Parameters

string $text:

Return value

mixed Returns boolean FALSE if the URL is not valid. On success, returns an object with the following attributes: protocol, hostname, ip, and port.

15 calls to link_validate_url()
LinkValidateUrlLight::testInvalidExternalLinks in tests/link.validate.test
LinkValidateUrlLight::testValidateBadNewsgroupLink in tests/link.validate.test
LinkValidateUrlLight::testValidateEmailLink in tests/link.validate.test
LinkValidateUrlLight::testValidateEmailLinkBad in tests/link.validate.test
LinkValidateUrlLight::testValidateExternalLinks in tests/link.validate.test

... See full list

File

./link.inc, line 311
Helper functions for Link field, widget and form elements.

Code

function link_validate_url($text) {
  $LINK_ICHARS_DOMAIN = (string) _link_html_entity_decode(implode("", array(
    "æ",
    // æ
    "Æ",
    // Æ
    "ø",
    // ø
    "Ø",
    // Ø
    "å",
    // å
    "Å",
    // Å
    "ä",
    // ä
    "Ä",
    // Ä
    "ö",
    // ö
    "Ö",
    // Ö
    "ü",
    // ü
    "Ü",
    // Ü
    "Ñ",
    // Ñ
    "ñ",
  )), ENT_QUOTES, 'UTF-8');
  $LINK_ICHARS = $LINK_ICHARS_DOMAIN . (string) _link_html_entity_decode(implode("", array(
    "ß",
  )), ENT_QUOTES, 'UTF-8');
  $allowed_protocols = variable_get('filter_allowed_protocols', array(
    'http',
    'https',
    'ftp',
    'news',
    'nntp',
    'telnet',
    'mailto',
    'irc',
    'ssh',
    'sftp',
    'webcal',
  ));
  $link_domains = _link_domains();
  $protocol = '((' . implode("|", $allowed_protocols) . '):\\/\\/)';
  $authentication = '(([a-z0-9%' . $LINK_ICHARS . ']+(:[a-z0-9%' . $LINK_ICHARS . '!]*)?)?@)';
  $domain = '(([a-z0-9' . $LINK_ICHARS_DOMAIN . ']([a-z0-9' . $LINK_ICHARS_DOMAIN . '\\-_\\[\\]])*)(\\.(([a-z0-9' . $LINK_ICHARS_DOMAIN . '\\-_\\[\\]])+\\.)*(' . $link_domains . '|[a-z]{2}))?)';
  $ipv4 = '([0-9]{1,3}(\\.[0-9]{1,3}){3})';
  $ipv6 = '([0-9a-fA-F]{1,4}(\\:[0-9a-fA-F]{1,4}){7})';
  $port = '(:([0-9]{1,5}))';

  // Pattern specific to external links.
  $external_pattern = '/^' . $protocol . '?' . $authentication . '?(' . $domain . '|' . $ipv4 . '|' . $ipv6 . ' |localhost)' . $port . '?';

  // Pattern specific to internal links.
  $internal_pattern = "/^([a-z0-9" . $LINK_ICHARS . "_\\-+\\[\\]]+)";
  $internal_pattern_file = "/^([a-z0-9" . $LINK_ICHARS . "_\\-+\\[\\]\\.]+)\$/i";
  $directories = "(\\/[a-z0-9" . $LINK_ICHARS . "_\\-\\.~+%=&,\$'!():;*@\\[\\]]*)*";

  // Yes, four backslashes == a single backslash.
  $query = "(\\/?\\?([?a-z0-9" . $LINK_ICHARS . "+_|\\-\\.\\/\\\\%=&,\$'():;*@\\[\\]{} ]*))";
  $anchor = "(#[a-z0-9" . $LINK_ICHARS . "_\\-\\.~+%=&,\$'():;*@\\[\\]\\/\\?]*)";

  // The rest of the path for a standard URL.
  $end = $directories . '?' . $query . '?' . $anchor . '?' . '$/i';
  $message_id = '[^@].*@' . $domain;
  $newsgroup_name = '([0-9a-z+-]*\\.)*[0-9a-z+-]*';
  $news_pattern = '/^news:(' . $newsgroup_name . '|' . $message_id . ')$/i';
  $user = '[a-zA-Z0-9' . $LINK_ICHARS . '_\\-\\.\\+\\^!#\\$%&*+\\/\\=\\?\\`\\|\\{\\}~\'\\[\\]]+';
  $email_pattern = '/^mailto:' . $user . '@' . '(' . $domain . '|' . $ipv4 . '|' . $ipv6 . '|localhost)' . $query . '?$/';
  if (strpos($text, '<front>') === 0) {
    return LINK_FRONT;
  }
  if (in_array('mailto', $allowed_protocols) && preg_match($email_pattern, $text)) {
    return LINK_EMAIL;
  }
  if (in_array('news', $allowed_protocols) && preg_match($news_pattern, $text)) {
    return LINK_NEWS;
  }
  if (preg_match($internal_pattern . $end, $text)) {
    return LINK_INTERNAL;
  }
  if (preg_match($external_pattern . $end, $text)) {
    return LINK_EXTERNAL;
  }
  if (preg_match($internal_pattern_file, $text)) {
    return LINK_INTERNAL;
  }
  return FALSE;
}