You are here

function cf_http_reduce_html_headers in Common Functionality 7.2

Same name and namespace in other branches
  1. 7 modules/cf_http/cf_http.module \cf_http_reduce_html_headers()

Reads and processes a website page at the given path.

@see: cf_http_get_webpage()

Parameters

$text_or_dom: Hostname or ip address of the server. Should not contain http:// or similary prefixes. If cf_dom is enabled, then this can instead be the cf_dom object.

$depth: The amount of shrinkage to perform. Any number from 1 to 6.

$preserve: A boolean representing whether or not to preserve the header structure when the depth of a given header is reduced to a number greater than 6. If preserve is false, all header formatting will be lost.

Return value

array An array containing the connection status and return http response. The array keys:

  • reduced: A boolean with TRUE representing that the text was successful

reduced and FALSE otherwise.

  • text: The http text with all html headers reduced by $depth. This is an empty string if $text_or_dom is a cf_dom object.

Related topics

File

modules/cf_http/cf_http.module, line 666
Common Functionality - HTTP module.

Code

function cf_http_reduce_html_headers($text_or_dom, $depth = 1, $preserve = TRUE) {
  $results = array(
    'reduced' => FALSE,
    'text' => '',
  );
  if ($depth < 0 || $depth > 6) {
    return $results;
  }
  if (class_exists('cf_dom') && $text_or_dom instanceof cf_dom) {
    if ($text_or_dom
      ->is_loaded()) {
      $body = $text_or_dom
        ->get_body();

      // HTML5 with hgroup allows for multiple simultaneous headers, do not change header if a parent is an hgroup.
      $has_hgroup_parent = array();
      $has_hgroup_parent[1] = array();
      $has_hgroup_parent[2] = array();
      $has_hgroup_parent[3] = array();
      $has_hgroup_parent[4] = array();
      $has_hgroup_parent[5] = array();
      $has_hgroup_parent[6] = array();
      $hgroups = $body
        ->getElementsByTagName('hgroup');
      foreach ($hgroups as $hgroup) {
        foreach (array(
          6,
          5,
          4,
          3,
          2,
          1,
        ) as $number) {
          $header = $hgroup
            ->getElementsByTagName('h' . $number);
          foreach ($header as $h) {
            $has_hgroup_parent[$number][] = $h;
          }
        }
      }
      foreach (array(
        6,
        5,
        4,
        3,
        2,
        1,
      ) as $number) {
        $reduced = $number + $depth;
        $tag_name = 'h' . $number;
        if ($reduced >= 6) {
          $tag_next = 'div';
        }
        else {
          $tag_next = 'h' . $reduced;
        }
        $elements = $body
          ->getElementsByTagName($tag_name);
        foreach ($elements as $element) {
          $found_hgroup_element = FALSE;
          foreach ($has_hgroup_parent[$number] as $e) {
            if ($element === $e) {
              $found_hgroup_element = TRUE;
              break;
            }
          }
          if ($found_hgroup_element) {
            continue;
          }
          if ($element
            ->hasAttribute('class')) {
            $class = explode(' ', $element
              ->getAttribute('class'));
          }
          else {
            $class = array();
          }
          if ($reduced < 6 || $preserve) {
            $class_name = 'cf_http-was_' . $tag_name;
            if (!in_array($class_name, $class)) {
              $class[] = $class_name;
              $element
                ->setAttribute('class', implode(' ', $class));
            }
          }
          $text_or_dom
            ->change_element($element, $tag_next);
        }
      }
      $results['reduced'] = TRUE;
    }
    return $results;
  }
  else {
    if (!is_string($text_or_dom)) {
      if (class_exists('cf_error')) {
        cf_error::invalid_string('text_or_dom');
      }
      return $results;
    }
  }
  $results['text'] = $text_or_dom;
  foreach (array(
    6,
    5,
    4,
    3,
    2,
    1,
  ) as $number) {
    $reduced = $number + $depth;
    $tag = 'h' . $reduced;
    $matches = array();
    if ($reduced > 6) {
      $tag = 'div';
    }
    if ($tag != 'div' || $preserve) {
      if (preg_match_all('/<h' . $number . '([^>]*)>/i', $results['text'], $matches) > 0) {
        $results['text'] = preg_replace('/<h' . $number . '>/i', '<' . $tag . ' class="cf_http-was_h' . $number . '">', $results['text']);
        foreach ($matches[1] as $match_key => $match) {
          if (!empty($match)) {
            $class_matches = array();
            if (preg_match('/class="([^"]*)"/i', $match, $class_matches) == 0) {
              $class_matches = array();
              if (preg_match("/class='([^']*)'/i", $match, $class_matches) == 0) {
                $results['text'] = preg_replace('/<h' . $number . '([^>]*)>/i', '<' . $tag . ' ${1} class="cf_http-was_h' . $number . '">', $results['text']);
              }
              else {
                $new_attributes = preg_replace("/\\bclass='([^']*)'/i", "class='" . $class_matches[1] . ' cf_http-was_h' . $number . "'", $match);
                $results['text'] = preg_replace('/<h' . $number . '[^>]*>/i', '<' . $tag . ' ' . $new_attributes . '>', $results['text']);
              }
            }
            else {
              $new_attributes = preg_replace('/\\bclass="([^"]*)"/i', 'class="' . $class_matches[1] . ' cf_http-was_h' . $number . '"', $match);
              $results['text'] = preg_replace('/<h' . $number . '[^>]*>/i', '<' . $tag . ' ' . $new_attributes . '>', $results['text']);
            }
          }
        }
        $results['text'] = preg_replace('/<\\/h' . $number . '>/i', '</' . $tag . '>', $results['text']);
      }
    }
    else {
      $results['text'] = preg_replace('/<h' . $number . '([^>]*)>/i', '<' . $tag . ' ${1}' . '">', $results['text']);
      $results['text'] = preg_replace('/<\\/h' . $number . '>/i', '</' . $tag . '>', $results['text']);
    }
  }
  if (!is_string($results['text'])) {
    $results['text'] = '';
    return $results;
  }
  $results['reduced'] = TRUE;
  return $results;
}