You are here

function shurly_validate_long in ShURLy 8

Same name and namespace in other branches
  1. 6 shurly.module \shurly_validate_long()
  2. 7 shurly.module \shurly_validate_long()

Validate a long URL.

Checks for:

  • a valid URL
  • it's not a link to an existing short URL.

Parameters

$long url - the long URL entered by user:

Return value

BOOLEAN - TRUE if valid, FALSE if invalid

3 calls to shurly_validate_long()
ShurlyCreateForm::validateForm in src/Form/ShurlyCreateForm.php
Form validation handler.
ShurlyEditForm::validateForm in src/Form/ShurlyEditForm.php
Form validation handler.
shurly_shorten in ./shurly.module
API function to shorten a URL.

File

./shurly.module, line 360
Description http://www.youtube.com/watch?v=Qo7qoonzTCE.

Code

function shurly_validate_long(&$long_url) {
  $return = TRUE;
  $match = FALSE;

  // If the person didn't remove the original http:// from the field, pull it out.
  $long_url = preg_replace('!^http\\://(http\\://|https\\://)!i', '\\1', $long_url);
  $long_parse = parse_url($long_url);
  $base_parse = parse_url($GLOBALS['base_url']);
  $check_ip = \Drupal::config('shurly.settings')
    ->get('shurly_forbid_ips');
  $check_localhost = \Drupal::config('shurly.settings')
    ->get('shurly_forbid_localhost');
  $check_resolvability = \Drupal::config('shurly.settings')
    ->get('shurly_forbid_unresolvable_hosts');
  $check_private_ip_ranges = \Drupal::config('shurly.settings')
    ->get('shurly_forbid_private_ips');
  if ($long_parse === FALSE || !isset($long_parse['host'])) {

    // Malformed URL
    // or no host in the URL.
    $return = FALSE;
  }
  elseif ($long_parse['scheme'] != 'http' && $long_parse['scheme'] != 'https') {
    $return = FALSE;
  }
  elseif ($check_ip && preg_match('/^\\d/', $long_parse['host'])) {

    // Host is given as IP address instead of a common hostname.
    $return = FALSE;

    // @todo Rework condition with respect to RFC 1123, which allows hostnames
    //   starting with a digit.
  }
  elseif ($check_localhost && shurly_host_is_local($long_parse['host'], TRUE)) {

    // Host seems to be the local host.
    $return = FALSE;
  }
  elseif ($check_resolvability && !shurly_host_is_resolveable($long_parse['host'], TRUE)) {

    // Host cannot be resolved (at least not by this server!).
    $return = FALSE;
  }
  elseif ($check_private_ip_ranges && shurly_host_is_private($long_parse['host'], TRUE)) {

    // Host refers to a private IP address.
    $return = FALSE;
  }
  else {
    if (\Drupal::config('shurly.settings')
      ->get('shurly_forbid_custom')) {
      $custom_pattern = \Drupal::config('shurly.settings')
        ->get('shurly_custom_restriction');
      if (!empty($custom_pattern)) {
        if (preg_match($custom_pattern, $long_url)) {
          $return = FALSE;
        }
      }
    }
    $long_domain_parts = explode('.', $long_parse['host']);
    $base_domain_parts = explode('.', $base_parse['host']);
    $count_long_domain = count($long_domain_parts);
    $last_long_part = isset($long_domain_parts[$count_long_domain - 1]) ? $long_domain_parts[$count_long_domain - 1] : NULL;
    $last_base_part = isset($base_domain_parts[$count_long_domain - 1]) ? $base_domain_parts[$count_long_domain - 1] : NULL;

    // If last domain part of entered URL matches last part of this domain.
    if ($last_long_part == $last_base_part) {

      // And (if there's a 2nd to last)
      if ($count_long_domain >= 2) {
        $last_long_penult = isset($long_domain_parts[$count_long_domain - 2]) ? $long_domain_parts[$count_long_domain - 2] : NULL;
        $last_base_penult = isset($base_domain_parts[$count_long_domain - 2]) ? $base_domain_parts[$count_long_domain - 2] : NULL;

        // Check that 2nd to last matches.
        if ($last_long_penult == $last_base_penult) {

          // Last 2 parts link to this domain.
          $match = TRUE;
        }
      }
      else {

        // there's only one part, and it links here.
        $match = TRUE;
      }

      // We only get down here if the long URL links to this domain
      // by the way, we're ignoring any subdomain...
      // so http://lbt.me/something and http://www.lbt.me/something are assumed to be the same.
      if ($match) {
        $queries = [];
        if (isset($long_parse['query'])) {

          // let's see if there's a $_GET['q'] in the long URL.
          $query = $long_parse['query'];
          $query = html_entity_decode($query);
          $query_array = explode('&', $query);
          foreach ($query_array as $val) {
            $x = explode('=', $val);
            $queries[$x[0]] = $x[1];
          }
        }
        if (isset($queries['q'])) {

          // If there's a 'q' query, Drupal uses this instead of anything in the path.
          $path = $queries['q'];
        }
        else {
          $path = $long_parse['path'];
        }

        // See if this is a link to an existing shortURL
        // remove the leading "/" from path, if it exists.
        $path = explode('/', $path, 2);
        $path = array_pop($path);
        if ($path) {

          // Get the base path of this Drupal install.
          $base = explode('/', base_path(), 2);
          $base = array_pop($base);

          // Remove the base from the path.
          if ($base) {
            $path = preg_replace('!' . preg_quote($base, '!') . '!i', '', $path);
          }
          if (shurly_url_exists($path)) {
            $return = FALSE;
          }
        }
      }
    }
  }
  return $return;
}