You are here

function link_validate_url in Link 5

Same name and namespace in other branches
  1. 6.2 link.inc \link_validate_url()
  2. 6 link.module \link_validate_url()
  3. 7 link.module \link_validate_url()

A lenient verification for URLs. Accepts all URLs following RFC 1738 standard for URL formation and all email addresses following the RFC 2368 standard for mailto address formation.

Parameters

string $text:

Return value

mixed Returns boolean FALSE if the URL is not valid. On success, returns an object with the following attributes: protocol, hostname, ip, and port.

4 calls to link_validate_url()
link_cleanup_url in ./link.module
Forms a valid URL if possible from an entered address. Trims whitespace and automatically adds an http:// to addresses without a protocol specified
link_field_formatter in ./link.module
Implementation of hook_field_formatter().
_link_widget_process in ./link.module
_link_widget_validate in ./link.module

File

./link.module, line 860
Defines simple link field types.

Code

function link_validate_url($text) {
  $allowed_protocols = variable_get('filter_allowed_protocols', array(
    'http',
    'https',
    'ftp',
    'news',
    'nntp',
    'telnet',
    'mailto',
    'irc',
    'ssh',
    'sftp',
    'webcal',
  ));
  $protocol = '((' . implode("|", $allowed_protocols) . '):\\/\\/)';
  $authentication = '(([a-z0-9%' . LINK_ICHARS . ']+(:[a-z0-9%' . LINK_ICHARS . '!]*)?)?@)';
  $domain = '(([a-z0-9' . LINK_ICHARS . ']([a-z0-9' . LINK_ICHARS . '\\-_\\[\\]])*)(\\.(([a-z0-9' . LINK_ICHARS . '\\-_\\[\\]])+\\.)*(' . LINK_DOMAINS . '|[a-z]{2}))?)';
  $ipv4 = '([0-9]{1,3}(\\.[0-9]{1,3}){3})';
  $ipv6 = '([0-9a-fA-F]{1,4}(\\:[0-9a-fA-F]{1,4}){7})';
  $port = '(:([0-9]{1,5}))';

  // Pattern specific to external links.
  $external_pattern = '/^' . $protocol . '?' . $authentication . '?(' . $domain . '|' . $ipv4 . '|' . $ipv6 . ' |localhost)' . $port . '?';

  // Pattern specific to internal links.
  $internal_pattern = "/^([a-z0-9" . LINK_ICHARS . "_\\-+\\[\\]]+)";
  $directories = "(\\/[a-z0-9" . LINK_ICHARS . "_\\-\\.~+%=&,\$'!():;*@\\[\\]]*)*";

  // Yes, four backslashes == a single backslash.
  $query = "(\\/?\\?([?a-z0-9" . LINK_ICHARS . "+_|\\-\\.\\/\\\\%=&,\$'():;*@\\[\\]{} ]*))";
  $anchor = "(#[a-z0-9" . LINK_ICHARS . "_\\-\\.~+%=&,\$'():;*@\\[\\]\\/\\?]*)";

  // The rest of the path for a standard URL.
  $end = $directories . '?' . $query . '?' . $anchor . '?' . '$/i';
  $message_id = '[^@].*@' . $domain;
  $newsgroup_name = '([0-9a-z+-]*\\.)*[0-9a-z+-]*';
  $news_pattern = '/^news:(' . $newsgroup_name . '|' . $message_id . ')$/i';
  $user = '[a-zA-Z0-9' . LINK_ICHARS . '_\\-\\.\\+\\^!#\\$%&*+\\/\\=\\?\\`\\|\\{\\}~\'\\[\\]]+';
  $email_pattern = '/^mailto:' . $user . '@' . '(' . $domain . '|' . $ipv4 . '|' . $ipv6 . '|localhost)' . $query . '?$/';
  if (strpos($text, '<front>') === 0) {
    return LINK_FRONT;
  }
  if (in_array('mailto', $allowed_protocols) && preg_match($email_pattern, $text)) {
    return LINK_EMAIL;
  }
  if (in_array('news', $allowed_protocols) && preg_match($news_pattern, $text)) {
    return LINK_NEWS;
  }
  if (preg_match($internal_pattern . $end, $text)) {
    return LINK_INTERNAL;
  }
  if (preg_match($external_pattern . $end, $text)) {
    return LINK_EXTERNAL;
  }
  return FALSE;
}