You are here

class MigrateItemsXML in Migrate 7.2

Same name and namespace in other branches
  1. 6.2 plugins/sources/xml.inc \MigrateItemsXML

Implementation of MigrateItems, for providing a list of IDs and for retrieving a parsed XML document given an ID from this list.

Hierarchy

Expanded class hierarchy of MigrateItemsXML

File

plugins/sources/xml.inc, line 427
Support for migration from XML sources.

View source
class MigrateItemsXML extends MigrateItems {

  /**
   * An array with all urls to available xml files.
   *
   * @var array
   */
  protected $urls;

  /**
   * Define the current cursor over the urls array.
   *
   * @var string
   */
  protected $currentUrl;

  /**
   * An array of namespaces to explicitly register before Xpath queries.
   *
   * @var array
   */
  protected $namespaces;

  /**
   * Stores the loaded XML document from currentUrl.
   *
   * @var SimpleXMLElement
   */
  protected $currentXml = FALSE;

  /**
   * To find the right url depending on the id, we'll build a map in the form of
   * an array('url1' => $ids, 'url2' => $ids, ...).
   *
   * @var array
   */
  protected $idsMap = NULL;

  /**
   * Stores the id list from all urls.
   *
   * @var array
   */
  protected $cacheIDs = NULL;

  /**
   * xpath identifying the element used for each item.
   *
   * @var string
   */
  protected $itemXpath;

  /**
   * Gets xpath identifying the element used for each item.
   *
   * @return string
   *   xpath
   */
  public function getItemXpath() {
    return $this->itemXpath;
  }

  /**
   * xpath identifying the subelement under itemXpath that holds the id for
   * each item.
   *
   * @var string
   */
  protected $itemIDXpath;

  /**
   * Getter for itemIDXpath.
   *
   * @return string
   */
  public function getIDXpath() {
    return $this->itemIDXpath;
  }

  /**
   * {@inheritdoc}
   */
  public function __construct($urls, $item_xpath = 'item', $item_id_xpath = 'id', array $namespaces = array()) {
    parent::__construct();
    if (!is_array($urls)) {
      $urls = array(
        $urls,
      );
    }
    $this->urls = $urls;
    $this->itemXpath = $item_xpath;
    $this->itemIDXpath = $item_id_xpath;
    $this->namespaces = $namespaces;

    // Suppress errors during parsing, so we can pick them up after.
    libxml_use_internal_errors(TRUE);
  }

  /**
   * Explicitly register namespaces on an XML element.
   *
   * @param SimpleXMLElement $xml
   *   A SimpleXMLElement to register the namespaces on.
   */
  protected function registerNamespaces(SimpleXMLElement &$xml) {
    foreach ($this->namespaces as $prefix => $namespace) {
      $xml
        ->registerXPathNamespace($prefix, $namespace);
    }
  }

  /**
   * Our public face is the URL list we're getting items from.
   */
  public function __toString() {
    $urls = implode('</li><li>', $this->urls);

    // Prepare a list of urls.
    $output = '<b>urls</b> = <ul><li>' . $urls . '</li></ul>';
    $output .= '<br />';

    // Add selection rules to the end.
    $output .= '<b>item xpath</b> = ' . $this->itemXpath . ' | ';
    $output .= '<b>item ID xpath</b> = ' . $this->itemIDXpath;
    return $output;
  }

  /**
   * Load and return the xml from currentUrl.
   *
   * @return SimpleXMLElement
   *   SimpleXMLElement
   */
  public function &xml() {
    if (!empty($this->currentUrl)) {
      $this->currentXml = simplexml_load_file($this->currentUrl);
      if ($this->currentXml === FALSE) {
        Migration::displayMessage(t('Loading of !currentUrl failed:', array(
          '!currentUrl' => $this->currentUrl,
        )));
        foreach (libxml_get_errors() as $error) {
          Migration::displayMessage(self::parseLibXMLError($error));
        }
      }
      else {
        $this
          ->registerNamespaces($this->currentXml);
      }
    }
    return $this->currentXml;
  }

  /**
   * Parses a LibXMLError to a error message string.
   *
   * @param LibXMLError $error
   *   Error thrown by the XML
   *
   * @return string
   *   Error message
   */
  public static function parseLibXMLError(LibXMLError $error) {
    $error_code_name = 'Unknown Error';
    switch ($error->level) {
      case LIBXML_ERR_WARNING:
        $error_code_name = t('Warning');
        break;
      case LIBXML_ERR_ERROR:
        $error_code_name = t('Error');
        break;
      case LIBXML_ERR_FATAL:
        $error_code_name = t('Fatal Error');
        break;
    }
    return t("!libxmlerrorcodename !libxmlerrorcode: !libxmlerrormessage\n" . "Line: !libxmlerrorline\n" . "Column: !libxmlerrorcolumn\n" . "File: !libxmlerrorfile", array(
      '!libxmlerrorcodename' => $error_code_name,
      '!libxmlerrorcode' => $error->code,
      '!libxmlerrormessage' => trim($error->message),
      '!libxmlerrorline' => $error->line,
      '!libxmlerrorcolumn' => $error->column,
      '!libxmlerrorfile' => $error->file ? $error->file : NULL,
    ));
  }

  /**
   * Load ID's from URLs.
   *
   * Load ids from all urls and map them in idsMap depending on the currentURL.
   *
   * After ids were fetched from all urls store them in cacheIDs and return the
   * whole list.
   *
   * @return array
   *   mapped ID's
   */
  public function getIdList() {
    $ids = array();
    foreach ($this->urls as $url) {
      migrate_instrument_start("Retrieve {$url}");

      // Make sure, to load new xml.
      $this->currentUrl = $url;
      $xml = $this
        ->xml();
      if ($xml !== FALSE) {
        $url_ids = $this
          ->getIdsFromXML($xml);
        $this->idsMap[$url] = $url_ids;
        $ids = array_merge($ids, $url_ids);
      }
      migrate_instrument_stop("Retrieve {$url}");
    }
    if (!empty($ids)) {
      $this->cacheIDs = array_unique($ids);
      return $this->cacheIDs;
    }
    return NULL;
  }

  /**
   * Given an XML object, parse out the IDs for processing and return them as
   * an array. The location of the IDs in the XML are based on the item xpath
   * and item ID xpath set in the constructor.
   *    eg, xpath = itemXpath . '/' . itemIDXpath
   * IDs are cached.  The list of IDs are returned from the cache except when
   * this is the first call (ie, cache is NULL) OR the refresh parameter is
   * TRUE.
   *
   * @param SimpleXMLElement $xml
   *   SimpleXMLElement
   *
   * @return array
   */
  protected function getIDsFromXML(SimpleXMLElement $xml) {
    $result = $xml
      ->xpath($this->itemXpath);
    $ids = array();
    if ($result) {
      foreach ($result as $element) {
        if (!isset($element)) {
          continue;
        }

        // Namespaces must be reapplied after xpath().
        $this
          ->registerNamespaces($element);
        $id = $this
          ->getItemID($element);
        if (!is_null($id)) {
          $ids[] = (string) $id;
        }
      }
    }
    return array_unique($ids);
  }

  /**
   * Return a count of all available IDs from the source listing.
   *
   * @return int
   *   count of available IDs
   */
  public function computeCount() {
    if (!isset($this->cacheIDs)) {
      $this
        ->getIdList();
    }
    return count($this->cacheIDs);
  }

  /**
   * Load the XML at the given URL, and return an array.
   *
   * @return array
   *   array of the Items found within it.
   */
  public function getAllItems() {
    $xml = $this
      ->xml();
    if ($xml !== FALSE) {
      return $this
        ->getItemsFromXML($xml, TRUE);
    }
    return NULL;
  }
  protected $currentItems = NULL;

  /**
   * Parses out the items from a given XML object, and parse it's items.
   *
   * Given an XML object, parse out the items for processing and return them as
   * an array. The location of the items in the XML are based on the item xpath
   * set in the constructor. Items from currentUrl are cached. The list of items
   * returned from the cache except when this is the first call
   * (ie, cache is NULL) OR the refresh parameter is TRUE.
   *
   * Items are cached as an array of key=ID and value=stdClass object with
   * attribute xml containing the xml SimpleXMLElement object of the item.
   *
   * @param SimpleXMLElement $xml
   *   XML to parse
   * @param bool $refresh
   *   Indicates if necessary parse again the items or get them from cache.
   *
   * @return array
   *   Array of obtained items.
   */
  public function getItemsFromXML(SimpleXMLElement $xml, $refresh = FALSE) {
    if ($refresh !== FALSE && $this->currentItems != NULL) {
      return $this->currentItems;
    }
    $this->currentItems = NULL;
    $items = array();
    $result = $xml
      ->xpath($this->itemXpath);
    if ($result) {
      foreach ($result as $item_xml) {
        if (!isset($item_xml)) {
          continue;
        }

        // Namespaces must be reapplied after xpath().
        $this
          ->registerNamespaces($item_xml);
        $id = $this
          ->getItemID($item_xml);
        $item = new stdclass();
        $item->xml = $item_xml;
        $items[$id] = $item;
      }
      $this->currentItems = $items;
      return $this->currentItems;
    }
    else {
      return NULL;
    }
  }

  /**
   * Get the item ID from the itemXML based on itemIDXpath.
   *
   * @param SimpleXMLElement $item_xml
   *   Element from we get the ID
   *
   * @return string
   *   The item ID
   */
  protected function getItemID($item_xml) {
    return $this
      ->getElementValue($item_xml, $this->itemIDXpath);
  }

  /**
   * Get an element from the itemXML based on an xpath.
   *
   * @param SimpleXMLElement $item_xml
   *   Element from we get the required value
   * @param string $xpath
   *   xpath used to locate the value
   *
   * @return string
   *   Extracted value
   */
  protected function getElementValue($item_xml, $xpath) {
    $value = NULL;
    if ($item_xml
      ->asXML()) {
      $result = $item_xml
        ->xpath($xpath);
      if ($result) {
        $value = (string) $result[0];
      }
    }
    return $value;
  }

  /**
   * Implementers are expected to return an object representing a source item.
   * Items from currentUrl are cached as an array of key=ID and value=stdClass
   * object with attribute xml containing the xml SimpleXMLElement object of the
   * item.
   *
   * @param mixed $id
   *
   * @return stdClass
   */
  public function getItem($id) {

    // Make sure we actually have an ID.
    if (empty($id)) {
      return NULL;
    }

    // If $id is in currentXml return the right item immediately.
    if (isset($this->currentItems) && isset($this->currentItems[$id])) {
      $item = $this->currentItems[$id];
    }
    else {

      // Otherwise find the right url and get the items from.
      if ($this->idsMap === NULL) {

        // Populate the map.
        $this
          ->getIdList();
      }
      foreach ($this->idsMap as $url => $ids) {
        if (in_array($id, $ids, TRUE)) {
          $this->currentItems = NULL;
          $this->currentUrl = $url;
          $items = $this
            ->getAllItems();
          $item = $items[$id];
        }
      }
    }
    if (!empty($item)) {
      return $item;
    }
    else {
      $migration = Migration::currentMigration();
      $message = t('Loading of item XML for ID !id failed:', array(
        '!id' => $id,
      ));
      foreach (libxml_get_errors() as $error) {
        $message .= "\n" . $error->message;
      }
      $migration
        ->getMap()
        ->saveMessage(array(
        $id,
      ), $message, MigrationBase::MESSAGE_ERROR);
      libxml_clear_errors();
      return NULL;
    }
  }

  /**
   * {@inheritdoc}
   */
  public function hash($row) {

    // $row->xml is a SimpleXMLElement. Temporarily set it as an XML string
    // to prevent parent::hash() failing when try to create the hash.
    migrate_instrument_start('MigrateItemXML::hash');
    $hash = md5(serialize($row->xml
      ->asXML()));
    migrate_instrument_stop('MigrateItemXML::hash');
    return $hash;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
MigrateItemsXML::$cacheIDs protected property Stores the id list from all urls.
MigrateItemsXML::$currentItems protected property
MigrateItemsXML::$currentUrl protected property Define the current cursor over the urls array.
MigrateItemsXML::$currentXml protected property Stores the loaded XML document from currentUrl.
MigrateItemsXML::$idsMap protected property To find the right url depending on the id, we'll build a map in the form of an array('url1' => $ids, 'url2' => $ids, ...).
MigrateItemsXML::$itemIDXpath protected property xpath identifying the subelement under itemXpath that holds the id for each item.
MigrateItemsXML::$itemXpath protected property xpath identifying the element used for each item.
MigrateItemsXML::$namespaces protected property An array of namespaces to explicitly register before Xpath queries.
MigrateItemsXML::$urls protected property An array with all urls to available xml files.
MigrateItemsXML::computeCount public function Return a count of all available IDs from the source listing. Overrides MigrateItems::computeCount
MigrateItemsXML::getAllItems public function Load the XML at the given URL, and return an array.
MigrateItemsXML::getElementValue protected function Get an element from the itemXML based on an xpath.
MigrateItemsXML::getIdList public function Load ID's from URLs. Overrides MigrateItems::getIdList
MigrateItemsXML::getIDsFromXML protected function Given an XML object, parse out the IDs for processing and return them as an array. The location of the IDs in the XML are based on the item xpath and item ID xpath set in the constructor. eg, xpath = itemXpath . '/' . itemIDXpath IDs are…
MigrateItemsXML::getIDXpath public function Getter for itemIDXpath.
MigrateItemsXML::getItem public function Implementers are expected to return an object representing a source item. Items from currentUrl are cached as an array of key=ID and value=stdClass object with attribute xml containing the xml SimpleXMLElement object of the item. Overrides MigrateItems::getItem
MigrateItemsXML::getItemID protected function Get the item ID from the itemXML based on itemIDXpath.
MigrateItemsXML::getItemsFromXML public function Parses out the items from a given XML object, and parse it's items.
MigrateItemsXML::getItemXpath public function Gets xpath identifying the element used for each item.
MigrateItemsXML::hash public function
MigrateItemsXML::parseLibXMLError public static function Parses a LibXMLError to a error message string.
MigrateItemsXML::registerNamespaces protected function Explicitly register namespaces on an XML element.
MigrateItemsXML::xml public function Load and return the xml from currentUrl.
MigrateItemsXML::__construct public function Overrides MigrateItems::__construct
MigrateItemsXML::__toString public function Our public face is the URL list we're getting items from. Overrides MigrateItems::__toString