You are here

function lc_GetUrls in Link checker 5

1 call to lc_GetUrls()
lc_GetUniqueUrls in ./linkchecker.module

File

./linkchecker.module, line 421
This module periodically check html links referenced by drupal nodes Developed and maintained by Marek Tichy, marek@ecn.cz

Code

function lc_GetUrls($html) {

  // Finds any links in the HTML
  $matches = array();
  preg_match_all("@\n  <\n  (a|area)\n  \\s\n  (.(?!(href)))*?\n  \\s*\n   (href\\s*=\\s*['\"]?\n    ([^\\'#\\[%\">][^\\'\">]*[^\\'\"> ])\n    \\s*['\"]?)\n  @iex", $html, $matches);
  $links = array();
  $ret = $matches[5];
  for ($i = 0; isset($ret[$i]); $i++) {
    if (preg_match("|^http://(.*)|i", $ret[$i])) {
      $links[] = $ret[$i];
    }
    elseif (preg_match("|^/(.*)|i", $ret[$i])) {
      if (variable_get('linkchecker_fqdn_only', 1) == 0) {
        $links[] = "http://" . $_SERVER["SERVER_NAME"] . "" . $ret[$i];
      }
    }
  }
  return $links;
}