class MigrateXMLReader in Migrate 7.2
Same name and namespace in other branches
- 6.2 plugins/sources/xml.inc \MigrateXMLReader
Makes an XMLReader object iterable, returning elements matching a restricted xpath-like syntax.
Hierarchy
- class \MigrateXMLReader implements \Iterator
Expanded class hierarchy of MigrateXMLReader
1 string reference to 'MigrateXMLReader'
- MigrateSourceXML::__construct in plugins/
sources/ xml.inc - Source constructor.
File
- plugins/
sources/ xml.inc, line 864 - Support for migration from XML sources.
View source
class MigrateXMLReader implements Iterator {
/**
* The XMLReader we are encapsulating.
*
* @var XMLReader
*/
public $reader;
/**
* URL of the source XML file.
*
* @var string
*/
public $url;
/**
* Array of the element names from the query, 0-based from the first (root)
* element. For example, '//file/article' would be stored as
* array(0 => 'file', 1 => 'article').
*
* @var array
*/
protected $elementsToMatch = array();
/**
* If the element query is filtering by an attribute name=value, the name of
* the attribute in question.
*
* @var string
*/
protected $attributeName = NULL;
/**
* If the element query is filtering by an attribute name=value, the value of
* the attribute in question.
*
* @var string
*/
protected $attributeValue = NULL;
/**
* Array representing the path to the current element as we traverse the XML.
* For example, if in an XML string like '<file><article>...</article></file>'
* we are positioned within the article element, currentPath will be
* array(0 => 'file', 1 => 'article').
*
* @var array
*/
protected $currentPath = array();
/**
* Query string used to retrieve the elements from the XML file.
*
* @var string
*/
public $elementQuery;
/**
* Xpath query string used to retrieve the primary key value from each
* element.
*
* @var string
*/
public $idQuery;
/**
* Current element object when iterating.
*
* @var SimpleXMLElement
*/
protected $currentElement = NULL;
/**
* Value of the ID for the current element when iterating.
*
* @var string
*/
protected $currentId = NULL;
/**
* When matching element names, whether to compare to the namespace-prefixed
* name, or the local name.
*
* @var bool
*/
protected $prefixedName = FALSE;
/**
* Prepares our extensions to the XMLReader object.
*
* @param string $xml_url
* URL of the XML file to be parsed.
* @param string $element_query
* Query string in a restricted xpath format, for selecting elements to be
* @param string $id_query
* Query string to the unique identifier for an element,
* relative to the root of that element. This supports the full
* xpath syntax.
*/
public function __construct($xml_url, $element_query, $id_query) {
$this->reader = new XMLReader();
$this->url = $xml_url;
$this->elementQuery = $element_query;
$this->idQuery = $id_query;
// Suppress errors during parsing, so we can pick them up after.
libxml_use_internal_errors(TRUE);
// Parse the element query. First capture group is the element path, second
// (if present) is the attribute.
preg_match_all('|^/([^\\[]+)(.*)$|', $element_query, $matches);
$element_path = $matches[1][0];
$this->elementsToMatch = explode('/', $element_path);
$attribute_query = $matches[2][0];
if ($attribute_query) {
// Matches [@attribute="value"] (with either single- or double-quotes).
preg_match_all('|^\\[@([^=]+)=[\'"](.*)[\'"]\\]$|', $attribute_query, $matches);
$this->attributeName = $matches[1][0];
$this->attributeValue = $matches[2][0];
}
// If the element path contains any colons, it must be specifying
// namespaces, so we need to compare using the prefixed element
// name in next().
if (strpos($element_path, ':')) {
$this->prefixedName = TRUE;
}
}
/**
* Implementation of Iterator::rewind().
*/
public function rewind() {
// (Re)open the provided URL.
$this->reader
->close();
$status = $this->reader
->open($this->url, NULL, LIBXML_NOWARNING);
// Reset our path tracker.
$this->currentPath = array();
if ($status) {
// Load the first matching element and its ID.
$this
->next();
}
else {
Migration::displayMessage(t('Could not open XML file !url', array(
'!url' => $this->url,
)));
}
}
/**
* Implementation of Iterator::next().
*/
public function next() {
migrate_instrument_start('MigrateXMLReader::next');
$this->currentElement = $this->currentId = NULL;
// Loop over each node in the XML file, looking for elements at a path
// matching the input query string (represented in $this->elementsToMatch).
while ($this->reader
->read()) {
if ($this->reader->nodeType == XMLREADER::ELEMENT) {
if ($this->prefixedName) {
$this->currentPath[$this->reader->depth] = $this->reader->name;
}
else {
$this->currentPath[$this->reader->depth] = $this->reader->localName;
}
if ($this->currentPath == $this->elementsToMatch) {
// We're positioned to the right element path - if filtering on an
// attribute, check that as well before accepting this element.
if (empty($this->attributeName) || $this->reader
->getAttribute($this->attributeName) == $this->attributeValue) {
// We've found a matching element - get a SimpleXML object
// representing it.We must associate the DOMNode with a
// DOMDocument to be able to import
// it into SimpleXML.
// Despite appearances, this is almost twice as fast as
// simplexml_load_string($this->readOuterXML());
$node = $this->reader
->expand();
if ($node) {
$dom = new DOMDocument();
$node = $dom
->importNode($node, TRUE);
$dom
->appendChild($node);
$this->currentElement = simplexml_import_dom($node);
$idnode = $this->currentElement
->xpath($this->idQuery);
if (is_array($idnode)) {
$this->currentId = (string) reset($idnode);
}
else {
throw new Exception(t('Failure retrieving ID, xpath: !xpath', array(
'!xpath' => $this->idQuery,
)));
}
break;
}
else {
foreach (libxml_get_errors() as $error) {
$error_string = MigrateItemsXML::parseLibXMLError($error);
if ($migration = Migration::currentMigration()) {
$migration
->saveMessage($error_string);
}
else {
Migration::displayMessage($error_string);
}
}
}
}
}
}
elseif ($this->reader->nodeType == XMLREADER::END_ELEMENT) {
// Remove this element and any deeper ones from the current path.
foreach ($this->currentPath as $depth => $name) {
if ($depth >= $this->reader->depth) {
unset($this->currentPath[$depth]);
}
}
}
}
migrate_instrument_stop('MigrateXMLReader::next');
}
/**
* Implementation of Iterator::current().
*
* @return null|SimpleXMLElement
* Current item
*/
public function current() {
return $this->currentElement;
}
/**
* Implementation of Iterator::key().
*
* @return null|string
* Current key
*/
public function key() {
return $this->currentId;
}
/**
* Implementation of Iterator::valid().
*
* @return bool
* Indicates if current element is valid
*/
public function valid() {
return $this->currentElement instanceof SimpleXMLElement;
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
MigrateXMLReader:: |
protected | property | If the element query is filtering by an attribute name=value, the name of the attribute in question. | |
MigrateXMLReader:: |
protected | property | If the element query is filtering by an attribute name=value, the value of the attribute in question. | |
MigrateXMLReader:: |
protected | property | Current element object when iterating. | |
MigrateXMLReader:: |
protected | property | Value of the ID for the current element when iterating. | |
MigrateXMLReader:: |
protected | property | Array representing the path to the current element as we traverse the XML. For example, if in an XML string like '<file><article>...</article></file>' we are positioned within the article element, currentPath will… | |
MigrateXMLReader:: |
public | property | Query string used to retrieve the elements from the XML file. | |
MigrateXMLReader:: |
protected | property | Array of the element names from the query, 0-based from the first (root) element. For example, '//file/article' would be stored as array(0 => 'file', 1 => 'article'). | |
MigrateXMLReader:: |
public | property | Xpath query string used to retrieve the primary key value from each element. | |
MigrateXMLReader:: |
protected | property | When matching element names, whether to compare to the namespace-prefixed name, or the local name. | |
MigrateXMLReader:: |
public | property | The XMLReader we are encapsulating. | |
MigrateXMLReader:: |
public | property | URL of the source XML file. | |
MigrateXMLReader:: |
public | function | Implementation of Iterator::current(). | |
MigrateXMLReader:: |
public | function | Implementation of Iterator::key(). | |
MigrateXMLReader:: |
public | function | Implementation of Iterator::next(). | |
MigrateXMLReader:: |
public | function | Implementation of Iterator::rewind(). | |
MigrateXMLReader:: |
public | function | Implementation of Iterator::valid(). | |
MigrateXMLReader:: |
public | function | Prepares our extensions to the XMLReader object. |