You are here

class MigrateXMLReader in Migrate 7.2

Same name and namespace in other branches
  1. 6.2 plugins/sources/xml.inc \MigrateXMLReader

Makes an XMLReader object iterable, returning elements matching a restricted xpath-like syntax.

Hierarchy

Expanded class hierarchy of MigrateXMLReader

1 string reference to 'MigrateXMLReader'
MigrateSourceXML::__construct in plugins/sources/xml.inc
Source constructor.

File

plugins/sources/xml.inc, line 864
Support for migration from XML sources.

View source
class MigrateXMLReader implements Iterator {

  /**
   * The XMLReader we are encapsulating.
   *
   * @var XMLReader
   */
  public $reader;

  /**
   * URL of the source XML file.
   *
   * @var string
   */
  public $url;

  /**
   * Array of the element names from the query, 0-based from the first (root)
   * element. For example, '//file/article' would be stored as
   * array(0 => 'file', 1 => 'article').
   *
   * @var array
   */
  protected $elementsToMatch = array();

  /**
   * If the element query is filtering by an attribute name=value, the name of
   * the attribute in question.
   *
   * @var string
   */
  protected $attributeName = NULL;

  /**
   * If the element query is filtering by an attribute name=value, the value of
   * the attribute in question.
   *
   * @var string
   */
  protected $attributeValue = NULL;

  /**
   * Array representing the path to the current element as we traverse the XML.
   * For example, if in an XML string like '<file><article>...</article></file>'
   * we are positioned within the article element, currentPath will be
   * array(0 => 'file', 1 => 'article').
   *
   * @var array
   */
  protected $currentPath = array();

  /**
   * Query string used to retrieve the elements from the XML file.
   *
   * @var string
   */
  public $elementQuery;

  /**
   * Xpath query string used to retrieve the primary key value from each
   * element.
   *
   * @var string
   */
  public $idQuery;

  /**
   * Current element object when iterating.
   *
   * @var SimpleXMLElement
   */
  protected $currentElement = NULL;

  /**
   * Value of the ID for the current element when iterating.
   *
   * @var string
   */
  protected $currentId = NULL;

  /**
   * When matching element names, whether to compare to the namespace-prefixed
   * name, or the local name.
   *
   * @var bool
   */
  protected $prefixedName = FALSE;

  /**
   * Prepares our extensions to the XMLReader object.
   *
   * @param string $xml_url
   *   URL of the XML file to be parsed.
   * @param string $element_query
   *   Query string in a restricted xpath format, for selecting elements to be
   * @param string $id_query
   *   Query string to the unique identifier for an element,
   *   relative to the root of that element. This supports the full
   *   xpath syntax.
   */
  public function __construct($xml_url, $element_query, $id_query) {
    $this->reader = new XMLReader();
    $this->url = $xml_url;
    $this->elementQuery = $element_query;
    $this->idQuery = $id_query;

    // Suppress errors during parsing, so we can pick them up after.
    libxml_use_internal_errors(TRUE);

    // Parse the element query. First capture group is the element path, second
    // (if present) is the attribute.
    preg_match_all('|^/([^\\[]+)(.*)$|', $element_query, $matches);
    $element_path = $matches[1][0];
    $this->elementsToMatch = explode('/', $element_path);
    $attribute_query = $matches[2][0];
    if ($attribute_query) {

      // Matches [@attribute="value"] (with either single- or double-quotes).
      preg_match_all('|^\\[@([^=]+)=[\'"](.*)[\'"]\\]$|', $attribute_query, $matches);
      $this->attributeName = $matches[1][0];
      $this->attributeValue = $matches[2][0];
    }

    // If the element path contains any colons, it must be specifying
    // namespaces, so we need to compare using the prefixed element
    // name in next().
    if (strpos($element_path, ':')) {
      $this->prefixedName = TRUE;
    }
  }

  /**
   * Implementation of Iterator::rewind().
   */
  public function rewind() {

    // (Re)open the provided URL.
    $this->reader
      ->close();
    $status = $this->reader
      ->open($this->url, NULL, LIBXML_NOWARNING);

    // Reset our path tracker.
    $this->currentPath = array();
    if ($status) {

      // Load the first matching element and its ID.
      $this
        ->next();
    }
    else {
      Migration::displayMessage(t('Could not open XML file !url', array(
        '!url' => $this->url,
      )));
    }
  }

  /**
   * Implementation of Iterator::next().
   */
  public function next() {
    migrate_instrument_start('MigrateXMLReader::next');
    $this->currentElement = $this->currentId = NULL;

    // Loop over each node in the XML file, looking for elements at a path
    // matching the input query string (represented in $this->elementsToMatch).
    while ($this->reader
      ->read()) {
      if ($this->reader->nodeType == XMLREADER::ELEMENT) {
        if ($this->prefixedName) {
          $this->currentPath[$this->reader->depth] = $this->reader->name;
        }
        else {
          $this->currentPath[$this->reader->depth] = $this->reader->localName;
        }
        if ($this->currentPath == $this->elementsToMatch) {

          // We're positioned to the right element path - if filtering on an
          // attribute, check that as well before accepting this element.
          if (empty($this->attributeName) || $this->reader
            ->getAttribute($this->attributeName) == $this->attributeValue) {

            // We've found a matching element - get a SimpleXML object
            // representing it.We must associate the DOMNode with a
            // DOMDocument to be able to import
            // it into SimpleXML.
            // Despite appearances, this is almost twice as fast as
            // simplexml_load_string($this->readOuterXML());
            $node = $this->reader
              ->expand();
            if ($node) {
              $dom = new DOMDocument();
              $node = $dom
                ->importNode($node, TRUE);
              $dom
                ->appendChild($node);
              $this->currentElement = simplexml_import_dom($node);
              $idnode = $this->currentElement
                ->xpath($this->idQuery);
              if (is_array($idnode)) {
                $this->currentId = (string) reset($idnode);
              }
              else {
                throw new Exception(t('Failure retrieving ID, xpath: !xpath', array(
                  '!xpath' => $this->idQuery,
                )));
              }
              break;
            }
            else {
              foreach (libxml_get_errors() as $error) {
                $error_string = MigrateItemsXML::parseLibXMLError($error);
                if ($migration = Migration::currentMigration()) {
                  $migration
                    ->saveMessage($error_string);
                }
                else {
                  Migration::displayMessage($error_string);
                }
              }
            }
          }
        }
      }
      elseif ($this->reader->nodeType == XMLREADER::END_ELEMENT) {

        // Remove this element and any deeper ones from the current path.
        foreach ($this->currentPath as $depth => $name) {
          if ($depth >= $this->reader->depth) {
            unset($this->currentPath[$depth]);
          }
        }
      }
    }
    migrate_instrument_stop('MigrateXMLReader::next');
  }

  /**
   * Implementation of Iterator::current().
   *
   * @return null|SimpleXMLElement
   *   Current item
   */
  public function current() {
    return $this->currentElement;
  }

  /**
   * Implementation of Iterator::key().
   *
   * @return null|string
   *   Current key
   */
  public function key() {
    return $this->currentId;
  }

  /**
   * Implementation of Iterator::valid().
   *
   * @return bool
   *   Indicates if current element is valid
   */
  public function valid() {
    return $this->currentElement instanceof SimpleXMLElement;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
MigrateXMLReader::$attributeName protected property If the element query is filtering by an attribute name=value, the name of the attribute in question.
MigrateXMLReader::$attributeValue protected property If the element query is filtering by an attribute name=value, the value of the attribute in question.
MigrateXMLReader::$currentElement protected property Current element object when iterating.
MigrateXMLReader::$currentId protected property Value of the ID for the current element when iterating.
MigrateXMLReader::$currentPath protected property Array representing the path to the current element as we traverse the XML. For example, if in an XML string like '<file><article>...</article></file>' we are positioned within the article element, currentPath will…
MigrateXMLReader::$elementQuery public property Query string used to retrieve the elements from the XML file.
MigrateXMLReader::$elementsToMatch protected property Array of the element names from the query, 0-based from the first (root) element. For example, '//file/article' would be stored as array(0 => 'file', 1 => 'article').
MigrateXMLReader::$idQuery public property Xpath query string used to retrieve the primary key value from each element.
MigrateXMLReader::$prefixedName protected property When matching element names, whether to compare to the namespace-prefixed name, or the local name.
MigrateXMLReader::$reader public property The XMLReader we are encapsulating.
MigrateXMLReader::$url public property URL of the source XML file.
MigrateXMLReader::current public function Implementation of Iterator::current().
MigrateXMLReader::key public function Implementation of Iterator::key().
MigrateXMLReader::next public function Implementation of Iterator::next().
MigrateXMLReader::rewind public function Implementation of Iterator::rewind().
MigrateXMLReader::valid public function Implementation of Iterator::valid().
MigrateXMLReader::__construct public function Prepares our extensions to the XMLReader object.