function _coder_review_read_and_parse_file in Coder 7.2
Same name and namespace in other branches
- 7 coder_review/coder_review.module \_coder_review_read_and_parse_file()
Parses and reads source files into a format for easier review validation.
For each source file, the following file lines of code (with trailing newlines) will be added to the Coder arguments array:
- #all_array_lines:
- #all_lines:
- #allphp_array_lines:
- #comment_array_lines:
- #doublequote_array_lines:
- #html_array_lines:
- #php_array_lines:
- #quote_array_lines:
The _array_ variants are multidimensional arrays, the first index for the line number, and the second index for each occurance within the line. #all_lines is a simple array, with each line from the source file as an index.
Parameters
array $coder_args: A Coder arguments array, passed by reference.
Return value
int Integer 1 if success.
1 call to _coder_review_read_and_parse_file()
- do_coder_reviews in coder_review/
coder_review.common.inc - Performs coder reviews for multiple code review definition files.
File
- coder_review/
coder_review.common.inc, line 377 - Common functions used by both the drush and form interfaces.
Code
function _coder_review_read_and_parse_file(array &$coder_args) {
// Determine the file extension type.
// Set all allowed PHP extensions to 'php'.
$pathinfo = pathinfo($coder_args['#filename']);
$allowed_extensions = array_merge($coder_args['#php_extensions'], $coder_args['#include_extensions'], array(
'module',
'theme',
));
// If the file extension is any of the allowed extensions (other than 'js')
// then set $ext to 'php', otherwise use the actual extension.
$ext = in_array($pathinfo['extension'], array_diff($allowed_extensions, array(
'js',
))) ? 'php' : $pathinfo['extension'];
/* The use of variables with 'php' in them ($in_php, $in_all_php, $php_lines,
* etc.) is misleading. All references to such should be renamed 'code'
* because we also are using this engine to read 'js' files.
*/
// Get the path to the module file.
$filepath = realpath($coder_args['#filename']);
if (!empty($coder_args['#patch']) || !empty($coder_args['#test']) || file_exists($filepath)) {
$in_php = $ext == 'js' ? 1 : 0;
$in_allphp = $in_php;
$in_comment = 0;
if (!empty($coder_args['#patch'])) {
$content = $coder_args['#patch'];
if (preg_match('/^\\s*\\*/', $content)) {
$in_comment = '*';
}
else {
$content = preg_replace('/^(function\\s.*?(\\r\\n|\\n)+)(\\s*\\*)/', '${1}/*', $content);
$in_php = 1;
$in_allphp = 1;
}
}
elseif (!empty($coder_args['#test'])) {
$content = $coder_args['#test'];
$in_php = 1;
$in_allphp = 1;
}
else {
$content = file_get_contents($filepath);
}
$content .= "\n";
$content_length = strlen($content);
$in_comment = 0;
$in_quote_html = 0;
$in_backslash = 0;
$in_quote = 0;
$in_heredoc = 0;
$in_heredoc_length = 0;
$in_heredoc_html = '';
$beginning_of_line = 0;
$this_all_lines = '';
$this_php_lines = '';
$this_allphp_lines = '';
$this_html_lines = '';
$this_quote_lines = array(
'',
);
$this_quote_index = -1;
$this_quote_sep = FALSE;
$this_doublequote_lines = array(
'',
);
$this_doublequote_index = -1;
$this_comment_lines = '';
// Parse the file:
// - Strip comments,
// - Strip quote content,
// - Strip stuff not in php,
// - Break into lines.
$lineno = 1;
for ($pos = 0; $pos < $content_length; ++$pos) {
// Get the current character.
$char = $content[$pos];
// Look ahead to the next character, to cater for \r\n line ends.
$next_char = isset($content[$pos + 1]) ? $content[$pos + 1] : '';
if ($char == "\n" || $char . $next_char == "\r\n") {
// End C++ style comments on newline.
if ($in_comment === '/' || $in_comment === '#') {
$in_comment = 0;
}
// Assume that html inside quotes doesn't span newlines.
$in_quote_html = 0;
// Remove coder's simpletests assertions as they validly contain bad
// code, for testing the review rules.
if (preg_match('/assertCoderReview(Fail|Pass)/', $this_all_lines)) {
++$lineno;
$this_all_lines = '';
$this_php_lines = '';
$this_allphp_lines = '';
$this_html_lines = '';
$this_comment_lines = '';
$this_quote_lines = array(
'',
);
continue;
}
// Remove blank lines now, so we avoid processing them over-and-over.
if ($this_all_lines != '') {
if (trim($this_all_lines, "\r\n") != '') {
$all_lines[$lineno] = array(
$this_all_lines,
);
$full_lines[$lineno] = $this_all_lines;
}
if (trim($this_php_lines, "\r\n") != '') {
$php_lines[$lineno] = array(
$this_php_lines,
);
}
if (trim($this_allphp_lines, "\r\n") != '') {
$allphp_lines[$lineno] = array(
$this_allphp_lines,
);
}
if (trim($this_html_lines, "\r\n") != '') {
$html_lines[$lineno] = array(
$this_html_lines,
);
}
$quotes = array();
foreach ($this_quote_lines as $quote_line) {
if (trim($quote_line, "\r\n") != '') {
$quotes[] = $quote_line;
}
}
if ($quotes) {
$quote_lines[$lineno] = $quotes;
}
$quotes = array();
foreach ($this_doublequote_lines as $quote_line) {
if (trim($quote_line, "\r\n") != '') {
$quotes[] = $quote_line;
}
}
if ($quotes) {
$doublequote_lines[$lineno] = $quotes;
}
if (trim($this_comment_lines, "\r\n") != '') {
$comment_lines[$lineno] = array(
$this_comment_lines,
);
}
}
// Increment $pos by an extra one if the newline was indicated by the
// two-character CRLF 'carriage return line feed'.
$pos += $char . $next_char == "\r\n";
// Save this line and start a new line.
++$lineno;
$this_all_lines = '';
$this_php_lines = '';
$this_allphp_lines = '';
$this_html_lines = '';
$this_quote_lines = array(
'',
);
$this_doublequote_lines = array(
'',
);
$this_quote_index = -1;
$this_quote_sep = FALSE;
$this_doublequote_index = -1;
$this_comment_lines = '';
$beginning_of_line = 1;
continue;
}
if ($this_all_lines != '') {
$beginning_of_line = 0;
}
$this_all_lines .= $char;
if ($in_php || $in_allphp) {
// When in a quoted string, look for the trailing quote; strip the
// characters in the string and replace with '' or "".
if ($in_quote) {
if ($in_backslash) {
$in_backslash = 0;
}
elseif ($char == '\\') {
$in_backslash = 1;
}
elseif ($char == $in_quote && !$in_backslash) {
$in_quote = 0;
}
elseif ($char == '<') {
$in_quote_html = '>';
}
if ($in_quote) {
if ($this_quote_index == -1) {
$this_quote_index = 0;
}
$this_quote_lines[$this_quote_index] .= $char;
if ($in_quote == '"') {
if ($this_doublequote_index == -1) {
$this_doublequote_index = 0;
}
$this_doublequote_lines[$this_doublequote_index] .= $char;
}
if ($in_quote_html) {
$this_html_lines .= $char;
}
}
if ($char == $in_quote_html) {
$in_quote_html = 0;
}
$this_allphp_lines .= $char;
// @note: Trailing char output with starting one.
unset($char);
}
elseif ($in_heredoc) {
// @note: drupal_substr does not properly handle multi-byte characters in this string.
// @todo: check other places where the drupal_ string functions fail.
if ($beginning_of_line && $char == $in_heredoc[0] && substr($content, $pos, $in_heredoc_length) == $in_heredoc) {
$this_all_lines .= _substr($content, $pos + 1, $in_heredoc_length - 1);
$in_heredoc = 0;
$pos += $in_heredoc_length;
}
elseif ($char == '<') {
$in_heredoc_html = '>';
}
if ($in_heredoc && $in_heredoc_html) {
$this_html_lines .= $char;
}
if ($in_heredoc_html && $char == $in_heredoc_html) {
$in_heredoc_html = '';
}
unset($char);
}
elseif ($ext == 'php' && $char == '?' && $content[$pos + 1] == '>' && $in_comment !== '*') {
unset($char);
$in_php = 0;
$in_allphp = 0;
$this_all_lines .= '>';
++$pos;
}
elseif ($in_comment) {
$this_comment_lines .= $char;
if ($in_comment == '*' && $char == '*' && $content[$pos + 1] == '/') {
$in_comment = 0;
$this_all_lines .= '/';
$this_comment_lines .= '/';
++$pos;
}
// Do not add comments to php output.
unset($char);
}
else {
switch ($char) {
case ',':
case ')':
case '(':
// For 'foo' => 'bar' type syntax.
case '>':
case ':':
// Look for separators which force a new quote string.
if ($this_quote_index < 0 || !empty($this_quote_lines[$this_quote_index])) {
$this_quote_sep = TRUE;
}
break;
case '\'':
case '"':
// If the previous char is a backslash then we have not found the
// ending-quote as this one is internal to the string. Keep going.
if ($pos == 0 || $content[$pos - 1] != '\\') {
$this_php_lines .= $char;
$in_quote = $char;
if ($this_quote_sep) {
$this_quote_lines[++$this_quote_index] = '';
if ($char == '"') {
$this_doublequote_lines[++$this_doublequote_index] = '';
}
}
$this_quote_sep = FALSE;
}
break;
case '#':
$this_comment_lines .= $char;
$in_comment = $char;
unset($char);
break;
case '/':
$next_char = $content[$pos + 1];
if ($next_char == '/' || $next_char == '*') {
unset($char);
$in_comment = $next_char;
$this_all_lines .= $next_char;
$this_comment_lines .= '/' . $next_char;
++$pos;
}
break;
case '<':
if ($content[$pos + 1] == '<' && $content[$pos + 2] == '<') {
unset($char);
$this_all_lines .= '<<';
// Get the heredoc word.
// Read until the end-of-line.
$heredoc = '';
for ($pos += 3; $pos < $content_length; ++$pos) {
$char = $content[$pos];
if ($char == "\n") {
$pos--;
if (preg_match('/^\\s*(\\w+)/', $heredoc, $match)) {
$in_heredoc = $match[1];
$in_heredoc_length = _strlen($in_heredoc);
}
break;
}
$this_all_lines .= $char;
$heredoc .= $char;
}
// Replace heredoc's with an empty string.
$this_php_lines .= '\'\'';
$this_allphp_lines .= '\'\'';
unset($char);
}
break;
}
}
if (isset($char)) {
$this_php_lines .= $char;
$this_allphp_lines .= $char;
}
}
else {
switch ($char) {
case '<':
if ($ext == 'php' && $content[$pos + 1] == '?') {
if ($content[$pos + 2] == ' ') {
$in_php = 1;
$in_allphp = 1;
$this_all_lines .= '? ';
$pos += 2;
}
elseif (_substr($content, $pos + 2, 3) == 'php') {
$in_php = 1;
$in_allphp = 1;
$this_all_lines .= '?php';
$pos += 4;
}
break;
}
// Purposefully fall through.
default:
$this_html_lines .= $char;
break;
}
}
}
// Add the files lines to the arguments.
$coder_args['#all_array_lines'] = isset($all_lines) ? $all_lines : array();
$coder_args['#php_array_lines'] = isset($php_lines) ? $php_lines : array();
$coder_args['#allphp_array_lines'] = isset($allphp_lines) ? $allphp_lines : array();
$coder_args['#html_array_lines'] = isset($html_lines) ? $html_lines : array();
$coder_args['#quote_array_lines'] = isset($quote_lines) ? $quote_lines : array();
$coder_args['#doublequote_array_lines'] = isset($doublequote_lines) ? $doublequote_lines : array();
$coder_args['#comment_array_lines'] = isset($comment_lines) ? $comment_lines : array();
$coder_args['#all_lines'] = isset($full_lines) ? $full_lines : array();
$coder_args['#raw_contents'] = $content;
$coder_args['#num_lines'] = isset($full_lines) ? key(array_slice($full_lines, -1, 1, TRUE)) : 0;
// Given the sanitized PHP lines, determine the class and function for each
// line.
$stack = array();
$class_stack = $class_stack_paren = array();
$function_stack = $function_stack_paren = array();
$paren_depth = 0;
foreach ($coder_args['#php_array_lines'] as $lineno => $line_array) {
foreach ($line_array as $line) {
// Check if this line is the beginning of a function definition.
if (preg_match('/function (\\w+)\\s*\\(/', $line, $match) && !preg_match('/;/', $line)) {
array_unshift($function_stack, $match[1]);
array_unshift($function_stack_paren, $paren_depth);
}
// Check if this line is the beginning of a class definition.
if (preg_match('/class (\\w+)/', $line, $match) || preg_match('/interface (\\w+)/', $line, $match)) {
array_unshift($class_stack, $match[1]);
array_unshift($class_stack_paren, $paren_depth);
}
// Check if this line changes the parenthesis depth.
if (preg_match_all('/([{}])/', $line, $match)) {
foreach ($match[0] as $paren_match) {
$paren_depth += $paren_match == '{' ? 1 : -1;
}
// If the depth is now less than then current function depth, pop the
// function from the stack.
if ($function_stack_paren && $paren_depth <= $function_stack_paren[0]) {
array_shift($function_stack);
array_shift($function_stack_paren);
}
// If the depth is now less than the current class depth, pop the
// class from the stack.
if ($class_stack_paren && $paren_depth <= $class_stack_paren[0]) {
array_shift($class_stack);
array_shift($class_stack_paren);
}
}
// Cache the current function and class for each line of each file.
$stack[$lineno] = array(
$class_stack ? $class_stack[0] : '',
$function_stack ? $function_stack[0] : '',
);
}
}
$coder_args['#stack'] = $stack;
// Read the coder warning directives in the comments.
foreach ($coder_args['#comment_array_lines'] as $lineno => $line_array) {
foreach ($line_array as $line) {
$pos = strpos($line, '@ignore ');
if ($pos !== FALSE && preg_match_all('/([\\w:]+)[\\s,]*/', _substr($line, $pos + 8), $matches)) {
foreach ($matches[1] as $ignore) {
list($rule_name, $scope) = explode(':', "{$ignore}:1");
if ($scope == 'file') {
// Find the end of the file.
$scope = $coder_args['#num_lines'] - $lineno;
}
elseif ($scope == 'class' || $scope == 'function') {
// What scope are we looking for?
// #stack is an array($class_name, $function_name).
$stack_index = $scope == 'class' ? 0 : 1;
// Find the current scope.
$current_scope = NULL;
foreach (array(
0,
1,
) as $current_lineno) {
if (!empty($stack[$lineno + $current_lineno][$stack_index])) {
$current_scope = $stack[$lineno + $current_lineno][$stack_index];
break;
}
}
// Find the end of the class or function.
if ($current_scope) {
for ($scope = 1; !isset($stack[$lineno + $scope]) || $stack[$lineno + $scope][$stack_index] == $current_scope; ++$scope) {
if ($lineno + $scope > $coder_args['#num_lines']) {
break;
}
}
}
}
elseif ($scope == 'comment') {
// Find the next line that is not a comment.
for ($scope = 0; !empty($comment_lines[$lineno + $scope + 1]); ++$scope) {
}
}
if (is_numeric($scope)) {
for ($line_offset = 0; $line_offset <= $scope; ++$line_offset) {
$ignores[$lineno + $line_offset][$rule_name] = $rule_name;
}
}
}
}
}
}
$coder_args['#ignores'] = isset($ignores) && $coder_args['#settings_ignore'] ? $ignores : array();
return 1;
}
}