View source
<?php
class PHPUnit_Util_XML {
public static function prepareString($string) {
return preg_replace('/[\\x00-\\x08\\x0b\\x0c\\x0e-\\x1f\\x7f]/', '', htmlspecialchars(PHPUnit_Util_String::convertToUtf8($string), ENT_QUOTES, 'UTF-8'));
}
public static function loadFile($filename, $isHtml = false, $xinclude = false, $strict = false) {
$reporting = error_reporting(0);
$contents = file_get_contents($filename);
error_reporting($reporting);
if ($contents === false) {
throw new PHPUnit_Framework_Exception(sprintf('Could not read "%s".', $filename));
}
return self::load($contents, $isHtml, $filename, $xinclude, $strict);
}
public static function load($actual, $isHtml = false, $filename = '', $xinclude = false, $strict = false) {
if ($actual instanceof DOMDocument) {
return $actual;
}
if ($xinclude) {
$cwd = getcwd();
@chdir(dirname($filename));
}
$document = new DOMDocument();
$document->preserveWhiteSpace = false;
$internal = libxml_use_internal_errors(true);
$message = '';
$reporting = error_reporting(0);
if ('' !== $filename) {
$document->documentURI = $filename;
}
if ($isHtml) {
$loaded = $document
->loadHTML($actual);
}
else {
$loaded = $document
->loadXML($actual);
}
if (!$isHtml && $xinclude) {
$document
->xinclude();
}
foreach (libxml_get_errors() as $error) {
$message .= "\n" . $error->message;
}
libxml_use_internal_errors($internal);
error_reporting($reporting);
if ($xinclude) {
@chdir($cwd);
}
if ($loaded === false || $strict && $message !== '') {
if ($filename !== '') {
throw new PHPUnit_Framework_Exception(sprintf('Could not load "%s".%s', $filename, $message != '' ? "\n" . $message : ''));
}
else {
throw new PHPUnit_Framework_Exception($message);
}
}
return $document;
}
public static function nodeToText(DOMNode $node) {
if ($node->childNodes->length == 1) {
return $node->textContent;
}
$result = '';
foreach ($node->childNodes as $childNode) {
$result .= $node->ownerDocument
->saveXML($childNode);
}
return $result;
}
public static function removeCharacterDataNodes(DOMNode $node) {
if ($node
->hasChildNodes()) {
for ($i = $node->childNodes->length - 1; $i >= 0; $i--) {
if (($child = $node->childNodes
->item($i)) instanceof DOMCharacterData) {
$node
->removeChild($child);
}
}
}
}
public static function xmlToVariable(DOMElement $element) {
$variable = null;
switch ($element->tagName) {
case 'array':
$variable = array();
foreach ($element
->getElementsByTagName('element') as $element) {
$item = $element->childNodes
->item(0);
if ($item instanceof DOMText) {
$item = $element->childNodes
->item(1);
}
$value = self::xmlToVariable($item);
if ($element
->hasAttribute('key')) {
$variable[(string) $element
->getAttribute('key')] = $value;
}
else {
$variable[] = $value;
}
}
break;
case 'object':
$className = $element
->getAttribute('class');
if ($element
->hasChildNodes()) {
$arguments = $element->childNodes
->item(1)->childNodes;
$constructorArgs = array();
foreach ($arguments as $argument) {
if ($argument instanceof DOMElement) {
$constructorArgs[] = self::xmlToVariable($argument);
}
}
$class = new ReflectionClass($className);
$variable = $class
->newInstanceArgs($constructorArgs);
}
else {
$variable = new $className();
}
break;
case 'boolean':
$variable = $element->textContent == 'true' ? true : false;
break;
case 'integer':
case 'double':
case 'string':
$variable = $element->textContent;
settype($variable, $element->tagName);
break;
}
return $variable;
}
public static function assertValidKeys(array $hash, array $validKeys) {
$valids = array();
foreach ($validKeys as $key => $val) {
is_int($key) ? $valids[$val] = null : ($valids[$key] = $val);
}
$validKeys = array_keys($valids);
foreach ($hash as $key => $value) {
if (!in_array($key, $validKeys)) {
$unknown[] = $key;
}
}
if (!empty($unknown)) {
throw new PHPUnit_Framework_Exception('Unknown key(s): ' . implode(', ', $unknown));
}
foreach ($valids as $key => $value) {
if (!isset($hash[$key])) {
$hash[$key] = $value;
}
}
return $hash;
}
public static function convertSelectToTag($selector, $content = true) {
$selector = trim(preg_replace("/\\s+/", ' ', $selector));
while (preg_match('/\\[[^\\]]+"[^"]+\\s[^"]+"\\]/', $selector)) {
$selector = preg_replace('/(\\[[^\\]]+"[^"]+)\\s([^"]+"\\])/', '$1__SPACE__$2', $selector);
}
if (strstr($selector, ' ')) {
$elements = explode(' ', $selector);
}
else {
$elements = array(
$selector,
);
}
$previousTag = array();
foreach (array_reverse($elements) as $element) {
$element = str_replace('__SPACE__', ' ', $element);
if ($element == '>') {
$previousTag = array(
'child' => $previousTag['descendant'],
);
continue;
}
if ($element == '+') {
$previousTag = array(
'adjacent-sibling' => $previousTag['descendant'],
);
continue;
}
$tag = array();
preg_match("/^([^\\.#\\[]*)/", $element, $eltMatches);
if (!empty($eltMatches[1])) {
$tag['tag'] = $eltMatches[1];
}
preg_match_all("/(\\[[^\\]]*\\]*|#[^\\.#\\[]*|\\.[^\\.#\\[]*)/", $element, $matches);
if (!empty($matches[1])) {
$classes = array();
$attrs = array();
foreach ($matches[1] as $match) {
if (substr($match, 0, 1) == '#') {
$tag['id'] = substr($match, 1);
}
elseif (substr($match, 0, 1) == '.') {
$classes[] = substr($match, 1);
}
elseif (substr($match, 0, 1) == '[' && substr($match, -1, 1) == ']') {
$attribute = substr($match, 1, strlen($match) - 2);
$attribute = str_replace('"', '', $attribute);
if (strstr($attribute, '~=')) {
list($key, $value) = explode('~=', $attribute);
$value = "regexp:/.*\\b{$value}\\b.*/";
}
elseif (strstr($attribute, '*=')) {
list($key, $value) = explode('*=', $attribute);
$value = "regexp:/.*{$value}.*/";
}
else {
list($key, $value) = explode('=', $attribute);
}
$attrs[$key] = $value;
}
}
if (!empty($classes)) {
$tag['class'] = implode(' ', $classes);
}
if (!empty($attrs)) {
$tag['attributes'] = $attrs;
}
}
if (is_string($content)) {
$tag['content'] = $content;
}
if (!empty($previousTag['descendant'])) {
$tag['descendant'] = $previousTag['descendant'];
}
elseif (!empty($previousTag['child'])) {
$tag['child'] = $previousTag['child'];
}
elseif (!empty($previousTag['adjacent-sibling'])) {
$tag['adjacent-sibling'] = $previousTag['adjacent-sibling'];
unset($tag['content']);
}
$previousTag = array(
'descendant' => $tag,
);
}
return $tag;
}
public static function cssSelect($selector, $content, $actual, $isHtml = true) {
$matcher = self::convertSelectToTag($selector, $content);
$dom = self::load($actual, $isHtml);
$tags = self::findNodes($dom, $matcher, $isHtml);
return $tags;
}
public static function findNodes(DOMDocument $dom, array $options, $isHtml = true) {
$valid = array(
'id',
'class',
'tag',
'content',
'attributes',
'parent',
'child',
'ancestor',
'descendant',
'children',
'adjacent-sibling',
);
$filtered = array();
$options = self::assertValidKeys($options, $valid);
if ($options['id']) {
$options['attributes']['id'] = $options['id'];
}
if ($options['class']) {
$options['attributes']['class'] = $options['class'];
}
$nodes = array();
if ($options['tag']) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $options['tag']);
}
else {
$elements = $dom
->getElementsByTagName($options['tag']);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
if (empty($nodes)) {
return false;
}
}
else {
$tags = array(
'a',
'abbr',
'acronym',
'address',
'area',
'b',
'base',
'bdo',
'big',
'blockquote',
'body',
'br',
'button',
'caption',
'cite',
'code',
'col',
'colgroup',
'dd',
'del',
'div',
'dfn',
'dl',
'dt',
'em',
'fieldset',
'form',
'frame',
'frameset',
'h1',
'h2',
'h3',
'h4',
'h5',
'h6',
'head',
'hr',
'html',
'i',
'iframe',
'img',
'input',
'ins',
'kbd',
'label',
'legend',
'li',
'link',
'map',
'meta',
'noframes',
'noscript',
'object',
'ol',
'optgroup',
'option',
'p',
'param',
'pre',
'q',
'samp',
'script',
'select',
'small',
'span',
'strong',
'style',
'sub',
'sup',
'table',
'tbody',
'td',
'textarea',
'tfoot',
'th',
'thead',
'title',
'tr',
'tt',
'ul',
'var',
'article',
'aside',
'audio',
'bdi',
'canvas',
'command',
'datalist',
'details',
'dialog',
'embed',
'figure',
'figcaption',
'footer',
'header',
'hgroup',
'keygen',
'mark',
'meter',
'nav',
'output',
'progress',
'ruby',
'rt',
'rp',
'track',
'section',
'source',
'summary',
'time',
'video',
'wbr',
);
foreach ($tags as $tag) {
if ($isHtml) {
$elements = self::getElementsByCaseInsensitiveTagName($dom, $tag);
}
else {
$elements = $dom
->getElementsByTagName($tag);
}
foreach ($elements as $element) {
$nodes[] = $element;
}
}
if (empty($nodes)) {
return false;
}
}
if ($options['attributes']) {
foreach ($nodes as $node) {
$invalid = false;
foreach ($options['attributes'] as $name => $value) {
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $value, $matches)) {
if (!preg_match($matches[1], $node
->getAttribute($name))) {
$invalid = true;
}
}
elseif ($name == 'class') {
$findClasses = explode(' ', preg_replace("/\\s+/", ' ', $value));
$allClasses = explode(' ', preg_replace("/\\s+/", ' ', $node
->getAttribute($name)));
foreach ($findClasses as $findClass) {
if (!in_array($findClass, $allClasses)) {
$invalid = true;
}
}
}
else {
if ($node
->getAttribute($name) != $value) {
$invalid = true;
}
}
}
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
if ($options['content'] !== null) {
foreach ($nodes as $node) {
$invalid = false;
if (preg_match('/^regexp\\s*:\\s*(.*)/i', $options['content'], $matches)) {
if (!preg_match($matches[1], self::getNodeText($node))) {
$invalid = true;
}
}
elseif ($options['content'] === '') {
if (self::getNodeText($node) !== '') {
$invalid = true;
}
}
elseif (strstr(self::getNodeText($node), $options['content']) === false) {
$invalid = true;
}
if (!$invalid) {
$filtered[] = $node;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
if ($options['parent']) {
$parentNodes = self::findNodes($dom, $options['parent'], $isHtml);
$parentNode = isset($parentNodes[0]) ? $parentNodes[0] : null;
foreach ($nodes as $node) {
if ($parentNode !== $node->parentNode) {
continue;
}
$filtered[] = $node;
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
if ($options['child']) {
$childNodes = self::findNodes($dom, $options['child'], $isHtml);
$childNodes = !empty($childNodes) ? $childNodes : array();
foreach ($nodes as $node) {
foreach ($node->childNodes as $child) {
foreach ($childNodes as $childNode) {
if ($childNode === $child) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
if ($options['adjacent-sibling']) {
$adjacentSiblingNodes = self::findNodes($dom, $options['adjacent-sibling'], $isHtml);
$adjacentSiblingNodes = !empty($adjacentSiblingNodes) ? $adjacentSiblingNodes : array();
foreach ($nodes as $node) {
$sibling = $node;
while ($sibling = $sibling->nextSibling) {
if ($sibling->nodeType !== XML_ELEMENT_NODE) {
continue;
}
foreach ($adjacentSiblingNodes as $adjacentSiblingNode) {
if ($sibling === $adjacentSiblingNode) {
$filtered[] = $node;
break;
}
}
break;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
if ($options['ancestor']) {
$ancestorNodes = self::findNodes($dom, $options['ancestor'], $isHtml);
$ancestorNode = isset($ancestorNodes[0]) ? $ancestorNodes[0] : null;
foreach ($nodes as $node) {
$parent = $node->parentNode;
while ($parent && $parent->nodeType != XML_HTML_DOCUMENT_NODE) {
if ($parent === $ancestorNode) {
$filtered[] = $node;
}
$parent = $parent->parentNode;
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
if ($options['descendant']) {
$descendantNodes = self::findNodes($dom, $options['descendant'], $isHtml);
$descendantNodes = !empty($descendantNodes) ? $descendantNodes : array();
foreach ($nodes as $node) {
foreach (self::getDescendants($node) as $descendant) {
foreach ($descendantNodes as $descendantNode) {
if ($descendantNode === $descendant) {
$filtered[] = $node;
}
}
}
}
$nodes = $filtered;
$filtered = array();
if (empty($nodes)) {
return false;
}
}
if ($options['children']) {
$validChild = array(
'count',
'greater_than',
'less_than',
'only',
);
$childOptions = self::assertValidKeys($options['children'], $validChild);
foreach ($nodes as $node) {
$childNodes = $node->childNodes;
foreach ($childNodes as $childNode) {
if ($childNode->nodeType !== XML_CDATA_SECTION_NODE && $childNode->nodeType !== XML_TEXT_NODE) {
$children[] = $childNode;
}
}
if (!empty($children)) {
if ($childOptions['count'] !== null) {
if (count($children) !== $childOptions['count']) {
break;
}
}
elseif ($childOptions['less_than'] !== null && $childOptions['greater_than'] !== null) {
if (count($children) >= $childOptions['less_than'] || count($children) <= $childOptions['greater_than']) {
break;
}
}
elseif ($childOptions['less_than'] !== null) {
if (count($children) >= $childOptions['less_than']) {
break;
}
}
elseif ($childOptions['greater_than'] !== null) {
if (count($children) <= $childOptions['greater_than']) {
break;
}
}
if ($childOptions['only']) {
$onlyNodes = self::findNodes($dom, $childOptions['only'], $isHtml);
foreach ($children as $child) {
$matched = false;
foreach ($onlyNodes as $onlyNode) {
if ($onlyNode === $child) {
$matched = true;
}
}
if (!$matched) {
break 2;
}
}
}
$filtered[] = $node;
}
}
$nodes = $filtered;
if (empty($nodes)) {
return;
}
}
return !empty($nodes) ? $nodes : array();
}
protected static function getDescendants(DOMNode $node) {
$allChildren = array();
$childNodes = $node->childNodes ? $node->childNodes : array();
foreach ($childNodes as $child) {
if ($child->nodeType === XML_CDATA_SECTION_NODE || $child->nodeType === XML_TEXT_NODE) {
continue;
}
$children = self::getDescendants($child);
$allChildren = array_merge($allChildren, $children, array(
$child,
));
}
return isset($allChildren) ? $allChildren : array();
}
protected static function getElementsByCaseInsensitiveTagName(DOMDocument $dom, $tag) {
$elements = $dom
->getElementsByTagName(strtolower($tag));
if ($elements->length == 0) {
$elements = $dom
->getElementsByTagName(strtoupper($tag));
}
return $elements;
}
protected static function getNodeText(DOMNode $node) {
if (!$node->childNodes instanceof DOMNodeList) {
return '';
}
$result = '';
foreach ($node->childNodes as $childNode) {
if ($childNode->nodeType === XML_TEXT_NODE || $childNode->nodeType === XML_CDATA_SECTION_NODE) {
$result .= trim($childNode->data) . ' ';
}
else {
$result .= self::getNodeText($childNode);
}
}
return str_replace(' ', ' ', $result);
}
}