You are here

function _coder_review_read_and_parse_file in Coder 7.2

Same name and namespace in other branches
  1. 7 coder_review/coder_review.module \_coder_review_read_and_parse_file()

Parses and reads source files into a format for easier review validation.

For each source file, the following file lines of code (with trailing newlines) will be added to the Coder arguments array:

  • #all_array_lines:
  • #all_lines:
  • #allphp_array_lines:
  • #comment_array_lines:
  • #doublequote_array_lines:
  • #html_array_lines:
  • #php_array_lines:
  • #quote_array_lines:

The _array_ variants are multidimensional arrays, the first index for the line number, and the second index for each occurance within the line. #all_lines is a simple array, with each line from the source file as an index.

Parameters

array $coder_args: A Coder arguments array, passed by reference.

Return value

int Integer 1 if success.

1 call to _coder_review_read_and_parse_file()
do_coder_reviews in coder_review/coder_review.common.inc
Performs coder reviews for multiple code review definition files.

File

coder_review/coder_review.common.inc, line 377
Common functions used by both the drush and form interfaces.

Code

function _coder_review_read_and_parse_file(array &$coder_args) {

  // Determine the file extension type.
  // Set all allowed PHP extensions to 'php'.
  $pathinfo = pathinfo($coder_args['#filename']);
  $allowed_extensions = array_merge($coder_args['#php_extensions'], $coder_args['#include_extensions'], array(
    'module',
    'theme',
  ));

  // If the file extension is any of the allowed extensions (other than 'js')
  // then set $ext to 'php', otherwise use the actual extension.
  $ext = in_array($pathinfo['extension'], array_diff($allowed_extensions, array(
    'js',
  ))) ? 'php' : $pathinfo['extension'];

  /* The use of variables with 'php' in them ($in_php, $in_all_php, $php_lines,
   * etc.) is misleading. All references to such should be renamed 'code'
   * because we also are using this engine to read 'js' files.
   */

  // Get the path to the module file.
  $filepath = realpath($coder_args['#filename']);
  if (!empty($coder_args['#patch']) || !empty($coder_args['#test']) || file_exists($filepath)) {
    $in_php = $ext == 'js' ? 1 : 0;
    $in_allphp = $in_php;
    $in_comment = 0;
    if (!empty($coder_args['#patch'])) {
      $content = $coder_args['#patch'];
      if (preg_match('/^\\s*\\*/', $content)) {
        $in_comment = '*';
      }
      else {
        $content = preg_replace('/^(function\\s.*?(\\r\\n|\\n)+)(\\s*\\*)/', '${1}/*', $content);
        $in_php = 1;
        $in_allphp = 1;
      }
    }
    elseif (!empty($coder_args['#test'])) {
      $content = $coder_args['#test'];
      $in_php = 1;
      $in_allphp = 1;
    }
    else {
      $content = file_get_contents($filepath);
    }
    $content .= "\n";
    $content_length = strlen($content);
    $in_comment = 0;
    $in_quote_html = 0;
    $in_backslash = 0;
    $in_quote = 0;
    $in_heredoc = 0;
    $in_heredoc_length = 0;
    $in_heredoc_html = '';
    $beginning_of_line = 0;
    $this_all_lines = '';
    $this_php_lines = '';
    $this_allphp_lines = '';
    $this_html_lines = '';
    $this_quote_lines = array(
      '',
    );
    $this_quote_index = -1;
    $this_quote_sep = FALSE;
    $this_doublequote_lines = array(
      '',
    );
    $this_doublequote_index = -1;
    $this_comment_lines = '';

    // Parse the file:
    // - Strip comments,
    // - Strip quote content,
    // - Strip stuff not in php,
    // - Break into lines.
    $lineno = 1;
    for ($pos = 0; $pos < $content_length; ++$pos) {

      // Get the current character.
      $char = $content[$pos];

      // Look ahead to the next character, to cater for \r\n  line ends.
      $next_char = isset($content[$pos + 1]) ? $content[$pos + 1] : '';
      if ($char == "\n" || $char . $next_char == "\r\n") {

        // End C++ style comments on newline.
        if ($in_comment === '/' || $in_comment === '#') {
          $in_comment = 0;
        }

        // Assume that html inside quotes doesn't span newlines.
        $in_quote_html = 0;

        // Remove coder's simpletests assertions as they validly contain bad
        // code, for testing the review rules.
        if (preg_match('/assertCoderReview(Fail|Pass)/', $this_all_lines)) {
          ++$lineno;
          $this_all_lines = '';
          $this_php_lines = '';
          $this_allphp_lines = '';
          $this_html_lines = '';
          $this_comment_lines = '';
          $this_quote_lines = array(
            '',
          );
          continue;
        }

        // Remove blank lines now, so we avoid processing them over-and-over.
        if ($this_all_lines != '') {
          if (trim($this_all_lines, "\r\n") != '') {
            $all_lines[$lineno] = array(
              $this_all_lines,
            );
            $full_lines[$lineno] = $this_all_lines;
          }
          if (trim($this_php_lines, "\r\n") != '') {
            $php_lines[$lineno] = array(
              $this_php_lines,
            );
          }
          if (trim($this_allphp_lines, "\r\n") != '') {
            $allphp_lines[$lineno] = array(
              $this_allphp_lines,
            );
          }
          if (trim($this_html_lines, "\r\n") != '') {
            $html_lines[$lineno] = array(
              $this_html_lines,
            );
          }
          $quotes = array();
          foreach ($this_quote_lines as $quote_line) {
            if (trim($quote_line, "\r\n") != '') {
              $quotes[] = $quote_line;
            }
          }
          if ($quotes) {
            $quote_lines[$lineno] = $quotes;
          }
          $quotes = array();
          foreach ($this_doublequote_lines as $quote_line) {
            if (trim($quote_line, "\r\n") != '') {
              $quotes[] = $quote_line;
            }
          }
          if ($quotes) {
            $doublequote_lines[$lineno] = $quotes;
          }
          if (trim($this_comment_lines, "\r\n") != '') {
            $comment_lines[$lineno] = array(
              $this_comment_lines,
            );
          }
        }

        // Increment $pos by an extra one if the newline was indicated by the
        // two-character CRLF 'carriage return line feed'.
        $pos += $char . $next_char == "\r\n";

        // Save this line and start a new line.
        ++$lineno;
        $this_all_lines = '';
        $this_php_lines = '';
        $this_allphp_lines = '';
        $this_html_lines = '';
        $this_quote_lines = array(
          '',
        );
        $this_doublequote_lines = array(
          '',
        );
        $this_quote_index = -1;
        $this_quote_sep = FALSE;
        $this_doublequote_index = -1;
        $this_comment_lines = '';
        $beginning_of_line = 1;
        continue;
      }
      if ($this_all_lines != '') {
        $beginning_of_line = 0;
      }
      $this_all_lines .= $char;
      if ($in_php || $in_allphp) {

        // When in a quoted string, look for the trailing quote; strip the
        // characters in the string and replace with '' or "".
        if ($in_quote) {
          if ($in_backslash) {
            $in_backslash = 0;
          }
          elseif ($char == '\\') {
            $in_backslash = 1;
          }
          elseif ($char == $in_quote && !$in_backslash) {
            $in_quote = 0;
          }
          elseif ($char == '<') {
            $in_quote_html = '>';
          }
          if ($in_quote) {
            if ($this_quote_index == -1) {
              $this_quote_index = 0;
            }
            $this_quote_lines[$this_quote_index] .= $char;
            if ($in_quote == '"') {
              if ($this_doublequote_index == -1) {
                $this_doublequote_index = 0;
              }
              $this_doublequote_lines[$this_doublequote_index] .= $char;
            }
            if ($in_quote_html) {
              $this_html_lines .= $char;
            }
          }
          if ($char == $in_quote_html) {
            $in_quote_html = 0;
          }
          $this_allphp_lines .= $char;

          // @note: Trailing char output with starting one.
          unset($char);
        }
        elseif ($in_heredoc) {

          // @note: drupal_substr does not properly handle multi-byte characters in this string.
          // @todo: check other places where the drupal_ string functions fail.
          if ($beginning_of_line && $char == $in_heredoc[0] && substr($content, $pos, $in_heredoc_length) == $in_heredoc) {
            $this_all_lines .= _substr($content, $pos + 1, $in_heredoc_length - 1);
            $in_heredoc = 0;
            $pos += $in_heredoc_length;
          }
          elseif ($char == '<') {
            $in_heredoc_html = '>';
          }
          if ($in_heredoc && $in_heredoc_html) {
            $this_html_lines .= $char;
          }
          if ($in_heredoc_html && $char == $in_heredoc_html) {
            $in_heredoc_html = '';
          }
          unset($char);
        }
        elseif ($ext == 'php' && $char == '?' && $content[$pos + 1] == '>' && $in_comment !== '*') {
          unset($char);
          $in_php = 0;
          $in_allphp = 0;
          $this_all_lines .= '>';
          ++$pos;
        }
        elseif ($in_comment) {
          $this_comment_lines .= $char;
          if ($in_comment == '*' && $char == '*' && $content[$pos + 1] == '/') {
            $in_comment = 0;
            $this_all_lines .= '/';
            $this_comment_lines .= '/';
            ++$pos;
          }

          // Do not add comments to php output.
          unset($char);
        }
        else {
          switch ($char) {
            case ',':
            case ')':
            case '(':

            // For 'foo' => 'bar' type syntax.
            case '>':
            case ':':

              // Look for separators which force a new quote string.
              if ($this_quote_index < 0 || !empty($this_quote_lines[$this_quote_index])) {
                $this_quote_sep = TRUE;
              }
              break;
            case '\'':
            case '"':

              // If the previous char is a backslash then we have not found the
              // ending-quote as this one is internal to the string. Keep going.
              if ($pos == 0 || $content[$pos - 1] != '\\') {
                $this_php_lines .= $char;
                $in_quote = $char;
                if ($this_quote_sep) {
                  $this_quote_lines[++$this_quote_index] = '';
                  if ($char == '"') {
                    $this_doublequote_lines[++$this_doublequote_index] = '';
                  }
                }
                $this_quote_sep = FALSE;
              }
              break;
            case '#':
              $this_comment_lines .= $char;
              $in_comment = $char;
              unset($char);
              break;
            case '/':
              $next_char = $content[$pos + 1];
              if ($next_char == '/' || $next_char == '*') {
                unset($char);
                $in_comment = $next_char;
                $this_all_lines .= $next_char;
                $this_comment_lines .= '/' . $next_char;
                ++$pos;
              }
              break;
            case '<':
              if ($content[$pos + 1] == '<' && $content[$pos + 2] == '<') {
                unset($char);
                $this_all_lines .= '<<';

                // Get the heredoc word.
                // Read until the end-of-line.
                $heredoc = '';
                for ($pos += 3; $pos < $content_length; ++$pos) {
                  $char = $content[$pos];
                  if ($char == "\n") {
                    $pos--;
                    if (preg_match('/^\\s*(\\w+)/', $heredoc, $match)) {
                      $in_heredoc = $match[1];
                      $in_heredoc_length = _strlen($in_heredoc);
                    }
                    break;
                  }
                  $this_all_lines .= $char;
                  $heredoc .= $char;
                }

                // Replace heredoc's with an empty string.
                $this_php_lines .= '\'\'';
                $this_allphp_lines .= '\'\'';
                unset($char);
              }
              break;
          }
        }
        if (isset($char)) {
          $this_php_lines .= $char;
          $this_allphp_lines .= $char;
        }
      }
      else {
        switch ($char) {
          case '<':
            if ($ext == 'php' && $content[$pos + 1] == '?') {
              if ($content[$pos + 2] == ' ') {
                $in_php = 1;
                $in_allphp = 1;
                $this_all_lines .= '? ';
                $pos += 2;
              }
              elseif (_substr($content, $pos + 2, 3) == 'php') {
                $in_php = 1;
                $in_allphp = 1;
                $this_all_lines .= '?php';
                $pos += 4;
              }
              break;
            }

          // Purposefully fall through.
          default:
            $this_html_lines .= $char;
            break;
        }
      }
    }

    // Add the files lines to the arguments.
    $coder_args['#all_array_lines'] = isset($all_lines) ? $all_lines : array();
    $coder_args['#php_array_lines'] = isset($php_lines) ? $php_lines : array();
    $coder_args['#allphp_array_lines'] = isset($allphp_lines) ? $allphp_lines : array();
    $coder_args['#html_array_lines'] = isset($html_lines) ? $html_lines : array();
    $coder_args['#quote_array_lines'] = isset($quote_lines) ? $quote_lines : array();
    $coder_args['#doublequote_array_lines'] = isset($doublequote_lines) ? $doublequote_lines : array();
    $coder_args['#comment_array_lines'] = isset($comment_lines) ? $comment_lines : array();
    $coder_args['#all_lines'] = isset($full_lines) ? $full_lines : array();
    $coder_args['#raw_contents'] = $content;
    $coder_args['#num_lines'] = isset($full_lines) ? key(array_slice($full_lines, -1, 1, TRUE)) : 0;

    // Given the sanitized PHP lines, determine the class and function for each
    // line.
    $stack = array();
    $class_stack = $class_stack_paren = array();
    $function_stack = $function_stack_paren = array();
    $paren_depth = 0;
    foreach ($coder_args['#php_array_lines'] as $lineno => $line_array) {
      foreach ($line_array as $line) {

        // Check if this line is the beginning of a function definition.
        if (preg_match('/function (\\w+)\\s*\\(/', $line, $match) && !preg_match('/;/', $line)) {
          array_unshift($function_stack, $match[1]);
          array_unshift($function_stack_paren, $paren_depth);
        }

        // Check if this line is the beginning of a class definition.
        if (preg_match('/class (\\w+)/', $line, $match) || preg_match('/interface (\\w+)/', $line, $match)) {
          array_unshift($class_stack, $match[1]);
          array_unshift($class_stack_paren, $paren_depth);
        }

        // Check if this line changes the parenthesis depth.
        if (preg_match_all('/([{}])/', $line, $match)) {
          foreach ($match[0] as $paren_match) {
            $paren_depth += $paren_match == '{' ? 1 : -1;
          }

          // If the depth is now less than then current function depth, pop the
          // function from the stack.
          if ($function_stack_paren && $paren_depth <= $function_stack_paren[0]) {
            array_shift($function_stack);
            array_shift($function_stack_paren);
          }

          // If the depth is now less than the current class depth, pop the
          // class from the stack.
          if ($class_stack_paren && $paren_depth <= $class_stack_paren[0]) {
            array_shift($class_stack);
            array_shift($class_stack_paren);
          }
        }

        // Cache the current function and class for each line of each file.
        $stack[$lineno] = array(
          $class_stack ? $class_stack[0] : '',
          $function_stack ? $function_stack[0] : '',
        );
      }
    }
    $coder_args['#stack'] = $stack;

    // Read the coder warning directives in the comments.
    foreach ($coder_args['#comment_array_lines'] as $lineno => $line_array) {
      foreach ($line_array as $line) {
        $pos = strpos($line, '@ignore ');
        if ($pos !== FALSE && preg_match_all('/([\\w:]+)[\\s,]*/', _substr($line, $pos + 8), $matches)) {
          foreach ($matches[1] as $ignore) {
            list($rule_name, $scope) = explode(':', "{$ignore}:1");
            if ($scope == 'file') {

              // Find the end of the file.
              $scope = $coder_args['#num_lines'] - $lineno;
            }
            elseif ($scope == 'class' || $scope == 'function') {

              // What scope are we looking for?
              // #stack is an array($class_name, $function_name).
              $stack_index = $scope == 'class' ? 0 : 1;

              // Find the current scope.
              $current_scope = NULL;
              foreach (array(
                0,
                1,
              ) as $current_lineno) {
                if (!empty($stack[$lineno + $current_lineno][$stack_index])) {
                  $current_scope = $stack[$lineno + $current_lineno][$stack_index];
                  break;
                }
              }

              // Find the end of the class or function.
              if ($current_scope) {
                for ($scope = 1; !isset($stack[$lineno + $scope]) || $stack[$lineno + $scope][$stack_index] == $current_scope; ++$scope) {
                  if ($lineno + $scope > $coder_args['#num_lines']) {
                    break;
                  }
                }
              }
            }
            elseif ($scope == 'comment') {

              // Find the next line that is not a comment.
              for ($scope = 0; !empty($comment_lines[$lineno + $scope + 1]); ++$scope) {
              }
            }
            if (is_numeric($scope)) {
              for ($line_offset = 0; $line_offset <= $scope; ++$line_offset) {
                $ignores[$lineno + $line_offset][$rule_name] = $rule_name;
              }
            }
          }
        }
      }
    }
    $coder_args['#ignores'] = isset($ignores) && $coder_args['#settings_ignore'] ? $ignores : array();
    return 1;
  }
}