You are here

class MigrateSourceXML in Migrate 7.2

Same name and namespace in other branches
  1. 6.2 plugins/sources/xml.inc \MigrateSourceXML

Implementation of MigrateSource, to handle imports from XML files.

Hierarchy

Expanded class hierarchy of MigrateSourceXML

File

plugins/sources/xml.inc, line 1122
Support for migration from XML sources.

View source
class MigrateSourceXML extends MigrateSource {

  /**
   * @var $reader MigrateXMLReader
   */
  protected $reader;

  /**
   * The MigrateXMLReader object serving as a cursor over the XML source.
   *
   * @return MigrateXMLReader
   *   MigrateXMLReader
   */
  public function getReader() {
    return $this->reader;
  }

  /**
   * The source URLs to load XML from
   *
   * @var array
   */
  protected $sourceUrls = array();

  /**
   * Holds our current position within the $source_urls array
   *
   * @var int
   */
  protected $activeUrl = NULL;

  /**
   * An array of namespaces to explicitly register before Xpath queries.
   *
   * @var array
   */
  protected $namespaces;

  /**
   * Store the query string used to recognize elements being iterated
   * so we can create reader objects on the fly.
   *
   * @var string
   */
  protected $elementQuery = '';

  /**
   * Store the query string used to retrieve the primary key value from each
   * element so we can create reader objects on the fly.
   *
   * @var string
   */
  protected $idQuery = '';

  /**
   * Store the reader class used to query XML so we can create reader objects
   * on the fly.
   *
   * @var string
   */
  protected $readerClass = '';

  /**
   * List of available source fields.
   *
   * @var array
   */
  protected $fields = array();

  /**
   * Source constructor.
   *
   * @param string|array $urls
   *   URL(s) of the XML source data.
   * @param string $element_query
   *   Query string used to recognize elements being iterated.
   * @param string $id_query
   *   Xpath query string used to retrieve the primary key value
   *   from each element.
   * @param array $fields
   *   Optional - keys are field names, values are descriptions. Use to override
   *   the default descriptions, or to add additional source fields which the
   *   migration will add via other means (e.g., prepareRow()).
   * @param array $options
   *   Options applied to this source. In addition to the standard MigrateSource
   *   options, we support:
   *   - reader_class: The reader class to instantiate for traversing the XML -
   *     defaults to MigrateXMLReader (any substitutions must be derived from
   *     MigrateXMLReader).
   */
  public function __construct($urls, $element_query, $id_query, array $fields = array(), array $options = array(), array $namespaces = array()) {
    parent::__construct($options);
    if (empty($options['reader_class'])) {
      $reader_class = 'MigrateXMLReader';
    }
    else {
      $reader_class = $options['reader_class'];
    }
    if (!is_array($urls)) {
      $urls = array(
        $urls,
      );
    }
    $this->sourceUrls = $urls;
    $this->activeUrl = NULL;
    $this->elementQuery = $element_query;
    $this->idQuery = $id_query;
    $this->readerClass = $reader_class;
    $this->fields = $fields;
    $this->namespaces = $namespaces;
  }

  /**
   * Explicitly register namespaces on an XML element.
   *
   * @param SimpleXMLElement $xml
   *   A SimpleXMLElement to register the namespaces on.
   */
  protected function registerNamespaces(SimpleXMLElement &$xml) {
    foreach ($this->namespaces as $prefix => $namespace) {
      $xml
        ->registerXPathNamespace($prefix, $namespace);
    }
  }

  /**
   * Return a string representing the source query.
   *
   * @return string
   *   source query
   */
  public function __toString() {

    // Clump the urls into a string
    // This could cause a problem when using
    // a lot of urls, may need to hash.
    $urls = implode(', ', $this->sourceUrls);
    return 'urls = ' . $urls . ' | item xpath = ' . $this->elementQuery . ' | item ID xpath = ' . $this->idQuery;
  }

  /**
   * Returns a list of fields available to be mapped from the source query.
   *
   * @return array
   *   keys: machine names of the fields (to be passed to addFieldMapping)
   *   values: Human-friendly descriptions of the fields.
   */
  public function fields() {
    return $this->fields;
  }

  /**
   * Returns the active Url.
   *
   * @return string
   *   active Url
   */
  public function activeUrl() {
    if (!is_null($this->activeUrl)) {
      return $this->sourceUrls[$this->activeUrl];
    }
  }

  /**
   * Return a count of all available source records.
   */
  public function computeCount() {
    $count = 0;
    foreach ($this->sourceUrls as $url) {
      $reader = new $this->readerClass($url, $this->elementQuery, $this->idQuery);
      foreach ($reader as $element) {
        $count++;
      }
    }
    return $count;
  }

  /**
   * Implementation of MigrateSource::performRewind().
   */
  public function performRewind() {

    // Set the reader back to the beginning of the file (positioned to the
    // first matching element), then apply our logic to make sure we have the
    // first element fulfilling our logic (idlist/map/prepareRow()).
    $this->activeUrl = NULL;
    $this->reader = NULL;
  }

  /**
   * Implementation of MigrationSource::getNextRow().
   *
   * @return stdClass
   *   data for the next row from the XML source files
   */
  public function getNextRow() {
    migrate_instrument_start('MigrateSourceXML::next');
    $source_key = $this->activeMap
      ->getSourceKey();
    $key_name = key($source_key);
    $row = NULL;

    // The reader is now lazy loaded, so it may
    // not be defined yet, need to test if set.
    if (isset($this->reader)) {

      // Attempt to load the next row.
      $this->reader
        ->next();
    }

    // Test the reader for a valid row.
    if (isset($this->reader) && $this->reader
      ->valid()) {
      $row = new stdClass();
      $row->{$key_name} = $this->reader
        ->key();
      $row->xml = $this->reader
        ->current();
      $this
        ->registerNamespaces($row->xml);
    }
    else {

      // The current source is at the end, try to load the next source.
      if ($this
        ->getNextSource()) {
        $row = new stdClass();
        $row->{$key_name} = $this->reader
          ->key();
        $row->xml = $this->reader
          ->current();
        $this
          ->registerNamespaces($row->xml);
      }
    }
    migrate_instrument_stop('MigrateSourceXML::next');
    return $row;
  }

  /**
   * Advances the reader to the next source from source_urls.
   *
   * @return bool
   *   TRUE if a valid source was loaded
   */
  public function getNextSource() {
    migrate_instrument_start('MigrateSourceXML::nextSource');

    // Return value.
    $status = FALSE;
    while ($this->activeUrl === NULL || count($this->sourceUrls) - 1 > $this->activeUrl) {
      if (is_null($this->activeUrl)) {
        $this->activeUrl = 0;
      }
      else {

        // Increment the activeUrl so we try to load the next source.
        $this->activeUrl = $this->activeUrl + 1;
      }
      $this->reader = new $this->readerClass($this->sourceUrls[$this->activeUrl], $this->elementQuery, $this->idQuery);
      $this->reader
        ->rewind();
      if ($this->reader
        ->valid()) {

        // We have a valid source.
        $status = TRUE;
        break;
      }
    }
    migrate_instrument_stop('MigrateSourceXML::nextSource');
    return $status;
  }

  /**
   * {@inheritdoc}
   */
  protected function hash($row) {

    // $row->xml is a SimpleXMLElement. Temporarily set it as an XML string
    // to prevent parent::hash() failing when try to create the hash.
    return parent::hash($row->xml
      ->asXML());
  }

}

Members

Namesort descending Modifiers Type Description Overrides
MigrateSource::$activeMap protected property The MigrateMap class for the current migration.
MigrateSource::$activeMigration protected property The Migration class currently invoking us, during rewind() and next().
MigrateSource::$cacheCounts protected property Whether this instance should cache the source count.
MigrateSource::$cacheKey protected property Key to use for caching counts.
MigrateSource::$currentKey protected property The primary key of the current row
MigrateSource::$currentRow protected property The current row from the quey
MigrateSource::$highwaterField protected property Information on the highwater mark for the current migration, if any.
MigrateSource::$idList protected property List of source IDs to process.
MigrateSource::$mapRowAdded protected property By default, next() will directly read the map row and add it to the data row. A source plugin implementation may do this itself (in particular, the SQL source can incorporate the map table into the query) - if so, it should set this TRUE so we…
MigrateSource::$multikeySeparator protected property Used in the case of multiple key sources that need to use idlist.
MigrateSource::$numIgnored protected property Number of rows intentionally ignored (prepareRow() returned FALSE)
MigrateSource::$numProcessed protected property Number of rows we've at least looked at. 1
MigrateSource::$originalHighwater protected property The highwater mark at the beginning of the import operation.
MigrateSource::$skipCount protected property Whether this instance should not attempt to count the source.
MigrateSource::$trackChanges protected property If TRUE, we will maintain hashed source rows to determine whether incoming data has changed.
MigrateSource::count public function Return a count of available source records, from the cache if appropriate. Returns -1 if the source is not countable.
MigrateSource::current public function Implementation of Iterator::current() - called when entering a loop iteration, returning the current row
MigrateSource::dataChanged protected function Determine whether this row has changed, and therefore whether it should be processed.
MigrateSource::getCurrentKey public function
MigrateSource::getIgnored public function
MigrateSource::getProcessed public function
MigrateSource::key public function Implementation of Iterator::key - called when entering a loop iteration, returning the key of the current row. It must be a scalar - we will serialize to fulfill the requirement, but using getCurrentKey() is preferable.
MigrateSource::next public function Implementation of Iterator::next() - subclasses of MigrateSource should implement getNextRow() to retrieve the next valid source rocord to process.
MigrateSource::prepareRow protected function Give the calling migration a shot at manipulating, and possibly rejecting, the source row.
MigrateSource::resetStats public function Reset numIgnored back to 0.
MigrateSource::rewind public function Implementation of Iterator::rewind() - subclasses of MigrateSource should implement performRewind() to do any class-specific setup for iterating source records.
MigrateSource::valid public function Implementation of Iterator::valid() - called at the top of the loop, returning TRUE to process the loop and FALSE to terminate it
MigrateSourceXML::$activeUrl protected property Holds our current position within the $source_urls array
MigrateSourceXML::$elementQuery protected property Store the query string used to recognize elements being iterated so we can create reader objects on the fly.
MigrateSourceXML::$fields protected property List of available source fields.
MigrateSourceXML::$idQuery protected property Store the query string used to retrieve the primary key value from each element so we can create reader objects on the fly.
MigrateSourceXML::$namespaces protected property An array of namespaces to explicitly register before Xpath queries.
MigrateSourceXML::$reader protected property
MigrateSourceXML::$readerClass protected property Store the reader class used to query XML so we can create reader objects on the fly.
MigrateSourceXML::$sourceUrls protected property The source URLs to load XML from
MigrateSourceXML::activeUrl public function Returns the active Url.
MigrateSourceXML::computeCount public function Return a count of all available source records.
MigrateSourceXML::fields public function Returns a list of fields available to be mapped from the source query. Overrides MigrateSource::fields
MigrateSourceXML::getNextRow public function Implementation of MigrationSource::getNextRow().
MigrateSourceXML::getNextSource public function Advances the reader to the next source from source_urls.
MigrateSourceXML::getReader public function The MigrateXMLReader object serving as a cursor over the XML source.
MigrateSourceXML::hash protected function Generate a hash of the source row. Overrides MigrateSource::hash
MigrateSourceXML::performRewind public function Implementation of MigrateSource::performRewind().
MigrateSourceXML::registerNamespaces protected function Explicitly register namespaces on an XML element.
MigrateSourceXML::__construct public function Source constructor. Overrides MigrateSource::__construct
MigrateSourceXML::__toString public function Return a string representing the source query.