You are here

function _linkchecker_extract_node_links in Link checker 6.2

Same name and namespace in other branches
  1. 7 linkchecker.module \_linkchecker_extract_node_links()

Extracts links from a node.

Parameters

$node: The fully populated node object.

$return_field_names: If set to TRUE, the returned array will contain the link URLs as keys, and each element will be an array containing all field names in which the URL is found (the special field name "node" is used to represent all scanned node content that is not a CCK field). Otherwise, a simple array of URLs will be returned.

Return value

An array whose keys are fully qualified and unique URLs found in the node (as returned by _linkchecker_extract_links()), or a more complex structured array (see above) if $return_field_names is TRUE.

2 calls to _linkchecker_extract_node_links()
_linkchecker_add_node_links in ./linkchecker.module
Add node links to database.
_linkchecker_link_node_ids in ./linkchecker.module
Returns IDs of nodes that contain a link which the current user may be allowed to view.

File

./linkchecker.module, line 905
This module periodically check links in given node types, blocks, cck fields, etc.

Code

function _linkchecker_extract_node_links($node, $return_field_names = FALSE) {

  // Get current node language options for url() functions.
  $languages = language_list();
  $url_options = empty($node->language) ? array(
    'absolute' => TRUE,
  ) : array(
    'language' => $languages[$node->language],
    'absolute' => TRUE,
  );

  // Create array of node fields to scan.
  $text_items = array();
  $text_items_by_field = array();
  $text_items[] = $text_items_by_field['node'][] = _filter_url($node->title, $node->format);
  $text_items[] = $text_items_by_field['node'][] = _linkchecker_check_markup($node->body, $node->format, FALSE);
  $text_items[] = $text_items_by_field['node'][] = _linkchecker_check_markup($node->teaser, $node->format, FALSE);

  // Search for links in 'weblink' nodes from 'links' module package.
  if (module_exists('links_weblink') && $node->type == 'weblink' && !empty($node->links_weblink_url)) {
    $text_items[] = $text_items_by_field['node'][] = _filter_url(url($node->links_weblink_url, $url_options), $node->format);
  }

  // Search for links in 'weblinks' nodes from 'weblinks' module.
  if (module_exists('weblinks') && $node->type == 'weblinks' && !empty($node->url)) {
    $text_items[] = $text_items_by_field['node'][] = _filter_url(url($node->url, $url_options), $node->format);
  }

  // Search for CCK-fields of types 'link' and 'text'.
  if (module_exists('content')) {
    $fields = content_fields(NULL, $node->type);
    foreach ($fields as $field) {
      if (!empty($node->{$field['field_name']})) {
        if (module_exists('link') && $field['type'] == 'link') {
          foreach ($node->{$field}['field_name'] as $delta => $item) {
            if (!empty($item['url'])) {

              // Make non-absolute urls absolute or they are not found by _filter_url().
              $text_items[] = $text_items_by_field[$field['field_name']][] = _filter_url(url($item['url'], $url_options), $node->format);
            }
          }
        }
        elseif (module_exists('text') && $field['type'] == 'text') {
          foreach ($node->{$field}['field_name'] as $delta => $item) {
            $text_items[] = $text_items_by_field[$field['field_name']][] = _filter_url($item['value'], $node->format);
          }
        }
      }
    }
  }

  // Get the absolute node path for extraction of relative links.
  $path = url('node/' . $node->nid, $url_options);

  // Extract all links in a node.
  $links = _linkchecker_extract_links(implode(' ', $text_items), $path);

  // Return either the array of links, or an array of field names containing
  // each link, depending on what was requested.
  if (!$return_field_names) {
    return $links;
  }
  else {
    $field_names = array();
    foreach ($text_items_by_field as $field_name => $items) {
      foreach ($items as $item) {
        foreach ($links as $uri => $link) {

          // We only need to do a quick check here to see if the URL appears
          // anywhere in the text; if so, that means users with access to this
          // field will be able to see the URL (and any private data such as
          // passwords contained in it). This is sufficient for the purposes of
          // _linkchecker_link_node_ids(), where this information is used.
          foreach ($link as $original_link) {
            if (strpos($item, $original_link) !== FALSE) {
              $field_names[$uri][$field_name] = $field_name;
            }
          }
        }
      }
    }
    return $field_names;
  }
}