class MigrateItemsXML in Migrate 7.2
Same name and namespace in other branches
- 6.2 plugins/sources/xml.inc \MigrateItemsXML
Implementation of MigrateItems, for providing a list of IDs and for retrieving a parsed XML document given an ID from this list.
Hierarchy
- class \MigrateItems
- class \MigrateItemsXML
Expanded class hierarchy of MigrateItemsXML
File
- plugins/
sources/ xml.inc, line 427 - Support for migration from XML sources.
View source
class MigrateItemsXML extends MigrateItems {
/**
* An array with all urls to available xml files.
*
* @var array
*/
protected $urls;
/**
* Define the current cursor over the urls array.
*
* @var string
*/
protected $currentUrl;
/**
* An array of namespaces to explicitly register before Xpath queries.
*
* @var array
*/
protected $namespaces;
/**
* Stores the loaded XML document from currentUrl.
*
* @var SimpleXMLElement
*/
protected $currentXml = FALSE;
/**
* To find the right url depending on the id, we'll build a map in the form of
* an array('url1' => $ids, 'url2' => $ids, ...).
*
* @var array
*/
protected $idsMap = NULL;
/**
* Stores the id list from all urls.
*
* @var array
*/
protected $cacheIDs = NULL;
/**
* xpath identifying the element used for each item.
*
* @var string
*/
protected $itemXpath;
/**
* Gets xpath identifying the element used for each item.
*
* @return string
* xpath
*/
public function getItemXpath() {
return $this->itemXpath;
}
/**
* xpath identifying the subelement under itemXpath that holds the id for
* each item.
*
* @var string
*/
protected $itemIDXpath;
/**
* Getter for itemIDXpath.
*
* @return string
*/
public function getIDXpath() {
return $this->itemIDXpath;
}
/**
* {@inheritdoc}
*/
public function __construct($urls, $item_xpath = 'item', $item_id_xpath = 'id', array $namespaces = array()) {
parent::__construct();
if (!is_array($urls)) {
$urls = array(
$urls,
);
}
$this->urls = $urls;
$this->itemXpath = $item_xpath;
$this->itemIDXpath = $item_id_xpath;
$this->namespaces = $namespaces;
// Suppress errors during parsing, so we can pick them up after.
libxml_use_internal_errors(TRUE);
}
/**
* Explicitly register namespaces on an XML element.
*
* @param SimpleXMLElement $xml
* A SimpleXMLElement to register the namespaces on.
*/
protected function registerNamespaces(SimpleXMLElement &$xml) {
foreach ($this->namespaces as $prefix => $namespace) {
$xml
->registerXPathNamespace($prefix, $namespace);
}
}
/**
* Our public face is the URL list we're getting items from.
*/
public function __toString() {
$urls = implode('</li><li>', $this->urls);
// Prepare a list of urls.
$output = '<b>urls</b> = <ul><li>' . $urls . '</li></ul>';
$output .= '<br />';
// Add selection rules to the end.
$output .= '<b>item xpath</b> = ' . $this->itemXpath . ' | ';
$output .= '<b>item ID xpath</b> = ' . $this->itemIDXpath;
return $output;
}
/**
* Load and return the xml from currentUrl.
*
* @return SimpleXMLElement
* SimpleXMLElement
*/
public function &xml() {
if (!empty($this->currentUrl)) {
$this->currentXml = simplexml_load_file($this->currentUrl);
if ($this->currentXml === FALSE) {
Migration::displayMessage(t('Loading of !currentUrl failed:', array(
'!currentUrl' => $this->currentUrl,
)));
foreach (libxml_get_errors() as $error) {
Migration::displayMessage(self::parseLibXMLError($error));
}
}
else {
$this
->registerNamespaces($this->currentXml);
}
}
return $this->currentXml;
}
/**
* Parses a LibXMLError to a error message string.
*
* @param LibXMLError $error
* Error thrown by the XML
*
* @return string
* Error message
*/
public static function parseLibXMLError(LibXMLError $error) {
$error_code_name = 'Unknown Error';
switch ($error->level) {
case LIBXML_ERR_WARNING:
$error_code_name = t('Warning');
break;
case LIBXML_ERR_ERROR:
$error_code_name = t('Error');
break;
case LIBXML_ERR_FATAL:
$error_code_name = t('Fatal Error');
break;
}
return t("!libxmlerrorcodename !libxmlerrorcode: !libxmlerrormessage\n" . "Line: !libxmlerrorline\n" . "Column: !libxmlerrorcolumn\n" . "File: !libxmlerrorfile", array(
'!libxmlerrorcodename' => $error_code_name,
'!libxmlerrorcode' => $error->code,
'!libxmlerrormessage' => trim($error->message),
'!libxmlerrorline' => $error->line,
'!libxmlerrorcolumn' => $error->column,
'!libxmlerrorfile' => $error->file ? $error->file : NULL,
));
}
/**
* Load ID's from URLs.
*
* Load ids from all urls and map them in idsMap depending on the currentURL.
*
* After ids were fetched from all urls store them in cacheIDs and return the
* whole list.
*
* @return array
* mapped ID's
*/
public function getIdList() {
$ids = array();
foreach ($this->urls as $url) {
migrate_instrument_start("Retrieve {$url}");
// Make sure, to load new xml.
$this->currentUrl = $url;
$xml = $this
->xml();
if ($xml !== FALSE) {
$url_ids = $this
->getIdsFromXML($xml);
$this->idsMap[$url] = $url_ids;
$ids = array_merge($ids, $url_ids);
}
migrate_instrument_stop("Retrieve {$url}");
}
if (!empty($ids)) {
$this->cacheIDs = array_unique($ids);
return $this->cacheIDs;
}
return NULL;
}
/**
* Given an XML object, parse out the IDs for processing and return them as
* an array. The location of the IDs in the XML are based on the item xpath
* and item ID xpath set in the constructor.
* eg, xpath = itemXpath . '/' . itemIDXpath
* IDs are cached. The list of IDs are returned from the cache except when
* this is the first call (ie, cache is NULL) OR the refresh parameter is
* TRUE.
*
* @param SimpleXMLElement $xml
* SimpleXMLElement
*
* @return array
*/
protected function getIDsFromXML(SimpleXMLElement $xml) {
$result = $xml
->xpath($this->itemXpath);
$ids = array();
if ($result) {
foreach ($result as $element) {
if (!isset($element)) {
continue;
}
// Namespaces must be reapplied after xpath().
$this
->registerNamespaces($element);
$id = $this
->getItemID($element);
if (!is_null($id)) {
$ids[] = (string) $id;
}
}
}
return array_unique($ids);
}
/**
* Return a count of all available IDs from the source listing.
*
* @return int
* count of available IDs
*/
public function computeCount() {
if (!isset($this->cacheIDs)) {
$this
->getIdList();
}
return count($this->cacheIDs);
}
/**
* Load the XML at the given URL, and return an array.
*
* @return array
* array of the Items found within it.
*/
public function getAllItems() {
$xml = $this
->xml();
if ($xml !== FALSE) {
return $this
->getItemsFromXML($xml, TRUE);
}
return NULL;
}
protected $currentItems = NULL;
/**
* Parses out the items from a given XML object, and parse it's items.
*
* Given an XML object, parse out the items for processing and return them as
* an array. The location of the items in the XML are based on the item xpath
* set in the constructor. Items from currentUrl are cached. The list of items
* returned from the cache except when this is the first call
* (ie, cache is NULL) OR the refresh parameter is TRUE.
*
* Items are cached as an array of key=ID and value=stdClass object with
* attribute xml containing the xml SimpleXMLElement object of the item.
*
* @param SimpleXMLElement $xml
* XML to parse
* @param bool $refresh
* Indicates if necessary parse again the items or get them from cache.
*
* @return array
* Array of obtained items.
*/
public function getItemsFromXML(SimpleXMLElement $xml, $refresh = FALSE) {
if ($refresh !== FALSE && $this->currentItems != NULL) {
return $this->currentItems;
}
$this->currentItems = NULL;
$items = array();
$result = $xml
->xpath($this->itemXpath);
if ($result) {
foreach ($result as $item_xml) {
if (!isset($item_xml)) {
continue;
}
// Namespaces must be reapplied after xpath().
$this
->registerNamespaces($item_xml);
$id = $this
->getItemID($item_xml);
$item = new stdclass();
$item->xml = $item_xml;
$items[$id] = $item;
}
$this->currentItems = $items;
return $this->currentItems;
}
else {
return NULL;
}
}
/**
* Get the item ID from the itemXML based on itemIDXpath.
*
* @param SimpleXMLElement $item_xml
* Element from we get the ID
*
* @return string
* The item ID
*/
protected function getItemID($item_xml) {
return $this
->getElementValue($item_xml, $this->itemIDXpath);
}
/**
* Get an element from the itemXML based on an xpath.
*
* @param SimpleXMLElement $item_xml
* Element from we get the required value
* @param string $xpath
* xpath used to locate the value
*
* @return string
* Extracted value
*/
protected function getElementValue($item_xml, $xpath) {
$value = NULL;
if ($item_xml
->asXML()) {
$result = $item_xml
->xpath($xpath);
if ($result) {
$value = (string) $result[0];
}
}
return $value;
}
/**
* Implementers are expected to return an object representing a source item.
* Items from currentUrl are cached as an array of key=ID and value=stdClass
* object with attribute xml containing the xml SimpleXMLElement object of the
* item.
*
* @param mixed $id
*
* @return stdClass
*/
public function getItem($id) {
// Make sure we actually have an ID.
if (empty($id)) {
return NULL;
}
// If $id is in currentXml return the right item immediately.
if (isset($this->currentItems) && isset($this->currentItems[$id])) {
$item = $this->currentItems[$id];
}
else {
// Otherwise find the right url and get the items from.
if ($this->idsMap === NULL) {
// Populate the map.
$this
->getIdList();
}
foreach ($this->idsMap as $url => $ids) {
if (in_array($id, $ids, TRUE)) {
$this->currentItems = NULL;
$this->currentUrl = $url;
$items = $this
->getAllItems();
$item = $items[$id];
}
}
}
if (!empty($item)) {
return $item;
}
else {
$migration = Migration::currentMigration();
$message = t('Loading of item XML for ID !id failed:', array(
'!id' => $id,
));
foreach (libxml_get_errors() as $error) {
$message .= "\n" . $error->message;
}
$migration
->getMap()
->saveMessage(array(
$id,
), $message, MigrationBase::MESSAGE_ERROR);
libxml_clear_errors();
return NULL;
}
}
/**
* {@inheritdoc}
*/
public function hash($row) {
// $row->xml is a SimpleXMLElement. Temporarily set it as an XML string
// to prevent parent::hash() failing when try to create the hash.
migrate_instrument_start('MigrateItemXML::hash');
$hash = md5(serialize($row->xml
->asXML()));
migrate_instrument_stop('MigrateItemXML::hash');
return $hash;
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
MigrateItemsXML:: |
protected | property | Stores the id list from all urls. | |
MigrateItemsXML:: |
protected | property | ||
MigrateItemsXML:: |
protected | property | Define the current cursor over the urls array. | |
MigrateItemsXML:: |
protected | property | Stores the loaded XML document from currentUrl. | |
MigrateItemsXML:: |
protected | property | To find the right url depending on the id, we'll build a map in the form of an array('url1' => $ids, 'url2' => $ids, ...). | |
MigrateItemsXML:: |
protected | property | xpath identifying the subelement under itemXpath that holds the id for each item. | |
MigrateItemsXML:: |
protected | property | xpath identifying the element used for each item. | |
MigrateItemsXML:: |
protected | property | An array of namespaces to explicitly register before Xpath queries. | |
MigrateItemsXML:: |
protected | property | An array with all urls to available xml files. | |
MigrateItemsXML:: |
public | function |
Return a count of all available IDs from the source listing. Overrides MigrateItems:: |
|
MigrateItemsXML:: |
public | function | Load the XML at the given URL, and return an array. | |
MigrateItemsXML:: |
protected | function | Get an element from the itemXML based on an xpath. | |
MigrateItemsXML:: |
public | function |
Load ID's from URLs. Overrides MigrateItems:: |
|
MigrateItemsXML:: |
protected | function | Given an XML object, parse out the IDs for processing and return them as an array. The location of the IDs in the XML are based on the item xpath and item ID xpath set in the constructor. eg, xpath = itemXpath . '/' . itemIDXpath IDs are… | |
MigrateItemsXML:: |
public | function | Getter for itemIDXpath. | |
MigrateItemsXML:: |
public | function |
Implementers are expected to return an object representing a source item.
Items from currentUrl are cached as an array of key=ID and value=stdClass
object with attribute xml containing the xml SimpleXMLElement object of the
item. Overrides MigrateItems:: |
|
MigrateItemsXML:: |
protected | function | Get the item ID from the itemXML based on itemIDXpath. | |
MigrateItemsXML:: |
public | function | Parses out the items from a given XML object, and parse it's items. | |
MigrateItemsXML:: |
public | function | Gets xpath identifying the element used for each item. | |
MigrateItemsXML:: |
public | function | ||
MigrateItemsXML:: |
public static | function | Parses a LibXMLError to a error message string. | |
MigrateItemsXML:: |
protected | function | Explicitly register namespaces on an XML element. | |
MigrateItemsXML:: |
public | function | Load and return the xml from currentUrl. | |
MigrateItemsXML:: |
public | function |
Overrides MigrateItems:: |
|
MigrateItemsXML:: |
public | function |
Our public face is the URL list we're getting items from. Overrides MigrateItems:: |