json.inc in Migrate 7.2
Same filename and directory in other branches
Support for migration from JSON sources.
File
plugins/sources/json.incView source
<?php
/**
* @file
* Support for migration from JSON sources.
*/
/**
* Implementation of MigrateList, for retrieving a list of IDs to be migrated
* from a JSON object.
*/
class MigrateListJSON extends MigrateList {
/**
* A URL pointing to an JSON object containing a list of IDs to be processed.
*
* @var string
*/
protected $listUrl;
protected $httpOptions;
public function __construct($list_url, $http_options = array()) {
parent::__construct();
$this->listUrl = $list_url;
$this->httpOptions = $http_options;
}
/**
* Our public face is the URL we're getting items from
*
* @return string
*/
public function __toString() {
return $this->listUrl;
}
/**
* Load the JSON at the given URL, and return an array of the IDs found
* within it.
*
* @return array
*/
public function getIdList() {
migrate_instrument_start("Retrieve {$this->listUrl}");
if (empty($this->httpOptions)) {
$json = file_get_contents($this->listUrl);
}
else {
$response = drupal_http_request($this->listUrl, $this->httpOptions);
$json = $response->data;
}
migrate_instrument_stop("Retrieve {$this->listUrl}");
if ($json) {
$data = drupal_json_decode($json);
if ($data !== NULL) {
return $this
->getIDsFromJSON($data);
}
}
Migration::displayMessage(t('Loading of !listurl failed:', array(
'!listurl' => $this->listUrl,
)));
return NULL;
}
/**
* Given an array generated from JSON, parse out the IDs for processing
* and return them as an array. The default implementation assumes the IDs are
* simply the values of the top-level elements - in most cases, you will need
* to override this to reflect your particular JSON structure.
*
* @param array $data
*
* @return array
*/
protected function getIDsFromJSON(array $data) {
return $data;
}
/**
* Return a count of all available IDs from the source listing. The default
* implementation assumes the count of top-level elements reflects the number
* of IDs available - in many cases, you will need to override this to reflect
* your particular JSON structure.
*/
public function computeCount() {
$count = 0;
if (empty($this->httpOptions)) {
$json = file_get_contents($this->listUrl);
}
else {
$response = drupal_http_request($this->listUrl, $this->httpOptions);
$json = $response->data;
}
if ($json) {
$data = drupal_json_decode($json);
if ($data) {
$count = count($this
->getIDsFromJSON($data));
}
}
return $count;
}
}
/**
* Implementation of MigrateItem, for retrieving a parsed JSON object given
* an ID provided by a MigrateList class.
*/
class MigrateItemJSON extends MigrateItem {
/**
* A URL pointing to a JSON object containing the data for one item to be
* migrated.
*
* @var string
*/
protected $itemUrl;
protected $httpOptions;
public function __construct($item_url, $http_options = array()) {
parent::__construct();
$this->itemUrl = $item_url;
$this->httpOptions = $http_options;
}
/**
* Implementors are expected to return an object representing a source item.
*
* @param mixed $id
*
* @return stdClass
*/
public function getItem($id) {
$item_url = $this
->constructItemUrl($id);
// Get the JSON object at the specified URL
$json = $this
->loadJSONUrl($item_url);
if ($json) {
return $json;
}
else {
$migration = Migration::currentMigration();
$message = t('Loading of !objecturl failed:', array(
'!objecturl' => $item_url,
));
$migration
->getMap()
->saveMessage(array(
$id,
), $message, MigrationBase::MESSAGE_ERROR);
return NULL;
}
}
/**
* The default implementation simply replaces the :id token in the URL with
* the ID obtained from MigrateListJSON. Override if the item URL is not
* so easily expressed from the ID.
*
* @param mixed $id
*/
protected function constructItemUrl($id) {
$count = is_array($id) ? count($id) : 1;
return preg_replace(array_fill(0, $count, '/:id/'), $id, $this->itemUrl, 1);
}
/**
* Default JSON loader - just pull and decode. This can be overridden for
* preprocessing of JSON (removal of unwanted elements, caching of JSON if the
* source service is slow, etc.)
*/
protected function loadJSONUrl($item_url) {
if (empty($this->httpOptions)) {
$json = file_get_contents($item_url);
}
else {
$response = drupal_http_request($item_url, $this->httpOptions);
$json = $response->data;
}
return json_decode($json);
}
}
/**
* An iterator over a JSON file. As is, this assumes that the file is structured
* as an array of objects, e.g.:
* [{"id":"53","field1":"value1"},{"id":"54","field1":"value2"}]
* To deal with different structures, extend this class and override next().
*/
class MigrateJSONReader implements Iterator {
/**
* URL of the source JSON file.
*
* @var string
*/
public $url;
/**
* Handle of the JSON file we're currently parsing.
*
* @var resource
*/
protected $fileHandle;
/**
* Current element object when iterating.
*
* @var
*/
protected $currentElement = NULL;
/**
* Value of the ID for the current element when iterating.
*
* @var string
*/
protected $currentId = NULL;
/**
* Initialize the members.
*
* @param $json_url
* URL or filespec of the JSON file to be parsed.
* @param $id_field
* Name of the field within each object containing its unique ID.
*/
public function __construct($json_url, $id_field) {
$this->url = $json_url;
$this->idField = $id_field;
}
/**
* Implementation of Iterator::rewind().
*
* @return void
*/
public function rewind() {
// Close any open file - we open the files lazily in next().
if ($this->fileHandle) {
fclose($this->fileHandle);
$this->fileHandle = NULL;
}
// Load the first element.
$this
->next();
}
/**
* Obtain the next non-whitespace character from the JSON file.
*
* @return string
* The next non-whitespace character, or FALSE on end-of-file.
*/
protected function getNonBlank() {
while (($c = fgetc($this->fileHandle)) !== FALSE && trim($c) == '') {
}
return $c;
}
/**
* Implementation of Iterator::next().
*
* Populates currentElement (the object being retrieved) and currentId (that
* object's unique identifier) from the specified JSON file. Sets both to
* NULL at end-of-file. Handles properly-formed JSON, as well as some improper
* coding (specifically that generated in Ning exports).
*
* @return void
*/
public function next() {
migrate_instrument_start('MigrateJSONReader::next');
$this->currentElement = $this->currentId = NULL;
// Open the file and position it if necessary
if (!$this->fileHandle) {
$this->fileHandle = fopen($this->url, 'r');
if (!$this->fileHandle) {
Migration::displayMessage(t('Could not open JSON file !url', array(
'!url' => $this->url,
)));
return;
}
// We're expecting an array of characters, so the first character should be [.
$char = $this
->getNonBlank();
// Ning exports are wrapped in bogus (), so skip a leading (
if ($char == '(') {
$char = $this
->getNonBlank();
}
if ($char != '[') {
Migration::displayMessage(t('!url is not a JSON file containing an array of objects', array(
'!url' => $this->url,
)));
return;
}
}
// We expect to be positioned either at an object (beginning with {) or
// the end of the file (we should see a ] indicating this). Or, an
// object-separating comma, to be skipped. Note that this treats
// commas as optional between objects, which helps with processing
// malformed JSON with missing commas (as in Ning exports).
$c = $this
->getNonBlank();
if ($c == ',') {
$c = $this
->getNonBlank();
}
elseif ($c == ']') {
$c = $this
->getNonBlank();
if ($c != '{') {
$c = NULL;
}
}
// We expect to be at the first character of an object now.
if ($c == '{') {
// Start building a JSON string for this object.
$json = $c;
// Look for the closing }, ignoring brackets in strings, tracking nested
// brackets. Watch out for escaped quotes, but also note that \\" is not
// an escaped quote.
$depth = 1;
$in_string = FALSE;
$in_escape = FALSE;
while (($c = fgetc($this->fileHandle)) !== FALSE) {
$json .= $c;
if ($in_string) {
// Quietly accept an escaped character
if ($in_escape) {
$in_escape = FALSE;
}
else {
switch ($c) {
// Unescaped " means end of string
case '"':
$in_string = FALSE;
break;
// Unescaped \\ means start of escape
case '\\':
$in_escape = TRUE;
break;
}
}
}
else {
// Outside of strings, recognize {} as depth changes, " as start of
// string.
switch ($c) {
case '{':
$depth++;
break;
case '}':
$depth--;
break;
case '"':
$in_string = TRUE;
break;
}
// We've found our match, exit the loop.
if ($depth < 1) {
break;
}
}
}
// Turn the JSON string into an object.
$this->currentElement = json_decode($json);
$this->currentId = $this->currentElement->{$this->idField};
}
else {
$this->currentElement = NULL;
$this->currentId = NULL;
}
migrate_instrument_stop('MigrateJSONReader::next');
}
/**
* Implementation of Iterator::current().
*
* @return null|object
*/
public function current() {
return $this->currentElement;
}
/**
* Implementation of Iterator::key().
*
* @return null|string
*/
public function key() {
return $this->currentId;
}
/**
* Implementation of Iterator::valid().
*
* @return bool
*/
public function valid() {
return !empty($this->currentElement);
}
}
/**
* Implementation of MigrateSource, to handle imports from stand-alone JSON
* files.
*/
class MigrateSourceJSON extends MigrateSource {
/**
* The MigrateJSONReader object serving as a cursor over the JSON source file.
*
* @var MigrateJSONReader
*/
protected $reader;
/**
* Name of the field within the JSON object holding the ID value.
*
* @var string
*/
protected $idField;
/**
* The source URLs to load JSON from
*
* @var array
*/
protected $sourceUrls = array();
/**
* Holds our current position within the $source_urls array
*
* @var int
*/
protected $activeUrl = NULL;
/**
* Store the reader class used to query JSON so we can create reader objects
* on the fly.
*
* @var string
*/
protected $readerClass = '';
/**
* List of available source fields.
*
* @var array
*/
protected $fields = array();
/**
* Source constructor.
*
* @param string or array $url
* URL(s) of the JSON source data.
* @param string $id_field
* Name of the field within the JSON object holding the ID value.
* @param array $fields
* Optional - keys are field names, values are descriptions. Use to override
* the default descriptions, or to add additional source fields which the
* migration will add via other means (e.g., prepareRow()).
* @param boolean $options
* Options applied to this source. In addition to the standard MigrateSource
* options, we support:
* - reader_class: The reader class to instantiate for traversing the JSON -
* defaults to MigrateJSONReader (any substitutions must be derived from
* MigrateJSONReader).
*/
public function __construct($urls, $id_field, array $fields = array(), array $options = array()) {
parent::__construct($options);
$this->idField = $id_field;
if (empty($options['reader_class'])) {
$reader_class = 'MigrateJSONReader';
}
else {
$reader_class = $options['reader_class'];
}
if (!is_array($urls)) {
$urls = array(
$urls,
);
}
$this->sourceUrls = $urls;
$active_url = variable_get('migrate_source_json_active_url', NULL);
if (isset($active_url)) {
$active_url--;
}
$this->activeUrl = $active_url;
$this->readerClass = $reader_class;
$this->fields = $fields;
}
/**
* Return a string representing the source query.
*
* @return string
*/
public function __toString() {
// Clump the urls into a string
// This could cause a problem when using a lot of urls, may need to hash
$urls = implode(', ', $this->sourceUrls);
return 'urls = ' . $urls;
}
/**
* Returns a list of fields available to be mapped from the source query.
*
* @return array
* Keys: machine names of the fields (to be passed to addFieldMapping)
* Values: Human-friendly descriptions of the fields.
*/
public function fields() {
return $this->fields;
}
/**
* Returns the active Url.
*
* @return string
*/
public function activeUrl() {
if (isset($this->activeUrl)) {
return $this->sourceUrls[$this->activeUrl];
}
}
/**
* Return a count of all available source records.
*/
public function computeCount() {
$count = 0;
foreach ($this->sourceUrls as $url) {
$reader = new $this->readerClass($url, $this->idField);
foreach ($reader as $element) {
$count++;
}
}
return $count;
}
/**
* Implementation of MigrateSource::performRewind().
*/
public function performRewind() {
// Set the reader back to the beginning of the file (positioned to the
// first matching element), then apply our logic to make sure we have the
// first element fulfilling our logic (idlist/map/prepareRow()).
$active_url = variable_get('migrate_source_json_active_url', NULL);
if (isset($active_url)) {
$active_url--;
}
$this->activeUrl = $active_url;
if ($this->reader) {
$this->reader
->rewind();
$this->reader = NULL;
}
}
/**
* Implementation of MigrationSource::getNextRow().
*
* @return stdClass
* data for the next row from the JSON source files
*/
public function getNextRow() {
migrate_instrument_start('MigrateSourceJSON::next');
$row = NULL;
// The reader is lazy loaded, so it may not be defined yet, need to test if set
if ($this->reader) {
// attempt to load the next row
$this->reader
->next();
}
// Test the reader for a valid row
if (isset($this->reader) && $this->reader
->valid()) {
$row = $this->reader
->current();
}
else {
// The current source is at the end, try to load the next source
if ($this
->getNextSource()) {
$row = $this->reader
->current();
}
}
migrate_instrument_stop('MigrateSourceJSON::next');
return $row;
}
/**
* Advances the reader to the next source from source_urls
*
* @return bool
* TRUE if a valid source was loaded
*/
public function getNextSource() {
migrate_instrument_start('MigrateSourceJSON::nextSource');
// Return value
$status = FALSE;
while ($this->activeUrl === NULL || count($this->sourceUrls) - 1 > $this->activeUrl) {
if (is_null($this->activeUrl)) {
$this->activeUrl = 0;
}
else {
// Increment the activeUrl so we try to load the next source
$this->activeUrl = $this->activeUrl + 1;
}
variable_set('migrate_source_json_active_url', $this->activeUrl);
$this->reader = new $this->readerClass($this->sourceUrls[$this->activeUrl], $this->idField);
$this->reader
->rewind();
if ($this->reader
->valid()) {
// We have a valid source
$status = TRUE;
break;
}
}
if (count($this->sourceUrls) - 1 == $this->activeUrl) {
variable_del('migrate_source_json_active_url');
}
migrate_instrument_stop('MigrateSourceJSON::nextSource');
return $status;
}
}
Classes
Name | Description |
---|---|
MigrateItemJSON | Implementation of MigrateItem, for retrieving a parsed JSON object given an ID provided by a MigrateList class. |
MigrateJSONReader | An iterator over a JSON file. As is, this assumes that the file is structured as an array of objects,… |
MigrateListJSON | Implementation of MigrateList, for retrieving a list of IDs to be migrated from a JSON object. |
MigrateSourceJSON | Implementation of MigrateSource, to handle imports from stand-alone JSON files. |