wordpress.inc in WordPress Migrate 7.2
Same filename and directory in other branches
Implementation of migration from WordPress into Drupal
File
wordpress.incView source
<?php
/**
* @file
* Implementation of migration from WordPress into Drupal
*/
abstract class WordPressMigration extends XMLMigration {
/**
* The filespec of the WXR file this migration is based on.
*
* @var string
*/
protected $wxrFile;
/**
* The blog object representing a set of migrations.
*
* @var WordPressBlog
*/
protected $blog;
/**
* List of items skipped because they don't belong in the migration at all
* (e.g., wrong post type).
*
* @var array
*/
protected $skippedItems = array();
/**
* Constructor - general setup for WordPress migrations.
*
* @param array $arguments
* 'filename' => WXR file managed by this migration
*/
public function __construct(array $arguments) {
parent::__construct($arguments);
$filename = $this->arguments['filename'];
$this->wxrFile = $filename;
$this->blog = wordpress_migrate_blog($filename);
if (empty($this->arguments['namespaces'])) {
$this->arguments['namespaces'] = array();
}
}
/**
* Called after completion of each migration.
*/
protected function postImport() {
parent::postImport();
// Clear ignored rows from the map, so as not to confuse reporting.
$map_table = $this->map
->getMapTable();
foreach ($this->skippedItems as $postID) {
db_delete($map_table)
->condition('needs_update', MigrateMap::STATUS_IGNORED)
->condition('sourceid1', $postID)
->execute();
}
$this->skippedItems = array();
}
/**
* Outputs a progress message, reflecting the current status of a migration process.
*
* @param int $result
* Status of the process, represented by one of the Migration::RESULT_* constants.
*/
protected function progressMessage($result) {
$time = microtime(TRUE) - $this->lastfeedback;
if ($time > 0) {
$perminute = round(60 * $this->processed_since_feedback / $time);
$time = round($time, 1);
}
else {
$perminute = '?';
}
if ($this->status == Migration::STATUS_IMPORTING) {
switch ($result) {
case Migration::RESULT_COMPLETED:
$basetext = "Imported !numitems in !time sec (!perminute/min) - done with '!name'";
$type = 'completed';
break;
case Migration::RESULT_FAILED:
$basetext = "Imported !numitems in !time sec (!perminute/min) - failure with '!name'";
$type = 'failed';
break;
case Migration::RESULT_INCOMPLETE:
$basetext = "Imported !numitems in !time sec (!perminute/min) - continuing with '!name'";
$type = 'ok';
break;
case Migration::RESULT_STOPPED:
$basetext = "Imported !numitems in !time sec (!perminute/min) - stopped '!name'";
$type = 'warning';
break;
}
$numitems = $this->destination
->getCreated();
}
else {
switch ($result) {
case Migration::RESULT_COMPLETED:
$basetext = "Rolled back !numitems in !time sec (!perminute/min) - done with '!name'";
$type = 'completed';
break;
case Migration::RESULT_FAILED:
$basetext = "Rolled back !numitems in !time sec (!perminute/min) - failure with '!name'";
$type = 'failed';
break;
case Migration::RESULT_INCOMPLETE:
$basetext = "Rolled back !numitems in !time sec (!perminute/min) - continuing with '!name'";
$type = 'ok';
break;
case Migration::RESULT_STOPPED:
$basetext = "Rolled back !numitems in !time sec (!perminute/min) - stopped '!name'";
$type = 'warning';
break;
}
$numitems = $this->processed_since_feedback + $this->source
->getIgnored();
}
$message = t($basetext, array(
'!numitems' => $numitems,
'!time' => $time,
'!perminute' => $perminute,
'!name' => $this->machineName,
));
self::displayMessage($message, $type);
if ($result == Migration::RESULT_INCOMPLETE) {
$this->lastfeedback = time();
$this->processed_since_feedback = $this->successes_since_feedback = 0;
$this->source
->resetStats();
$this->destination
->resetStats();
}
}
/**
* Work-around for http://drupal.org/node/936222 - make sure our node_save()
* calls not governed by the node destination class do not overwrite aliases.
*
* @param $node
*/
protected function disablePathauto($node) {
$node->path['pathauto'] = 0;
if (!isset($node->path['alias'])) {
$node->path['alias'] = '';
}
}
}
class WordPressBlog {
protected $blogID;
public function getBlogID() {
return $this->blogID;
}
protected $filename;
public function getFilename() {
return $this->filename;
}
protected $wxrVersion = '1.0';
public function getWxrVersion() {
return $this->wxrVersion;
}
protected $title;
public function getTitle() {
return $this->title;
}
protected $displayTitle;
public function getDisplayTitle() {
return $this->displayTitle;
}
protected $blog_url;
public function getBlogUrl() {
return $this->blog_url;
}
protected $link;
public function getLink() {
return $this->link;
}
protected $uid;
public function getUid() {
return $this->uid;
}
protected $arguments = array();
protected $migrations = array();
public function __construct($filename, $arguments = array()) {
$this->filename = $filename;
$this->arguments = $arguments;
// Make sure the upload directory is properly protected
file_create_htaccess('wordpress://', TRUE);
// Suppress errors during parsing, so we can pick them up after
libxml_use_internal_errors(TRUE);
// Get the blog_url, which is our unique determiner of which blog we're
// talking about
$title = '';
$reader = new XMLReader();
$status = $reader
->open($this->filename);
if ($status) {
$this->blog_url = '';
while ($reader
->read()) {
if ($reader->nodeType == XMLREADER::ELEMENT) {
switch ($reader->name) {
case 'title':
$title = WordPressBlog::readString($reader);
$this->displayTitle = $title;
break;
case 'wp:wxr_version':
$this->wxrVersion = WordPressBlog::readString($reader);
break;
case 'wp:base_blog_url':
$this->blog_url = WordPressBlog::readString($reader);
break;
case 'link':
$this->link = WordPressBlog::readString($reader);
// Catch only the first link
if (empty($this->link)) {
$this->link = $reader
->readString();
}
break;
}
}
if (!empty($title) && !empty($this->blog_url) && !empty($this->link)) {
break;
}
}
}
else {
throw new Exception(t('Could not open XML file !url', array(
'!url' => $this->filename,
)));
}
// Validate that it really is a WXR file
if (empty($this->blog_url)) {
// Older WP versions did not have a blog_url but used link instead.
if (!empty($this->link)) {
$this->blog_url = $this->link;
}
else {
throw new Exception(t('The uploaded file is not a valid WordPress export'));
}
}
// Keep only alphabetic characters
$this->title = preg_replace('/[^A-Za-z]/', '', $title);
if (!$this->title) {
$this->title = preg_replace('/[^A-Za-z]/', '', $this->blog_url);
}
global $user;
$this->uid = $user->uid;
$status = db_merge('wordpress_migrate')
->key(array(
'blog_url' => $this->blog_url,
))
->fields(array(
'title' => $this->title,
'uid' => $this->uid,
'link' => $this->link,
'filename' => $this->filename,
'wxr_version' => $this->wxrVersion,
))
->execute();
$this->blogID = db_select('wordpress_migrate', 'wm')
->fields('wm', array(
'blog_id',
))
->condition('blog_url', $this->blog_url)
->execute()
->fetchField();
}
public function machineName($class_name) {
// If the default classes have been overridden, $class_name might be either
// the default class name, or the name of the overridden class. Check first
// for the former case, then the latter
$classes = $this
->migrationClasses();
if (!isset($classes[$class_name])) {
$flipped = array_flip($classes);
$class_name = $flipped[$class_name];
}
return $this->title . substr($class_name, strlen('WordPress'), strlen($class_name) - strlen('WordPress'));
}
/**
* The implemented WordPress migrations, in the order they should be run.
*/
public function migrationClasses() {
return array(
'WordPressAuthor' => 'WordPressAuthor',
'WordPressCategory' => 'WordPressCategory',
'WordPressTag' => 'WordPressTag',
'WordPressBlogEntry' => 'WordPressBlogEntry',
'WordPressPage' => 'WordPressPage',
'WordPressAttachment' => 'WordPressAttachment',
'WordPressComment' => 'WordPressComment',
);
}
/**
* Get a list of all migrations in this blog.
*
* @return Migration[]
*/
public function migrations() {
if (empty($this->migrations)) {
$this->migrations = array();
foreach ($this
->migrationClasses() as $base_class => $actual_class) {
try {
$this->migrations[$actual_class] = MigrationBase::getInstance($this
->machineName($actual_class));
} catch (Exception $e) {
// Simply ignore non-existent migrations
}
}
}
return $this->migrations;
}
/**
* Get a list of all WordPress blogs.
*
* @return WordPressBlog[]
*/
public static function blogs() {
$blogs = array();
$result = db_select('wordpress_migrate', 'wm')
->fields('wm', array(
'filename',
))
->execute();
foreach ($result as $row) {
$blogs[] = wordpress_migrate_blog($row->filename);
}
return $blogs;
}
/**
* WXR files typically need some cleanup to be successfully parsed - perform
* that here.
*
* @param $sourcefile
* The raw WXR file as uploaded.
* @param $destination
* Filespec to which to write the cleaned-up WXR file. Omit when
* $namespaces_only == TRUE.
* @param bool $unlink
* Indicates whether $sourcefile will be deleted after preprocessing.
* @param bool $namespaces_only
* When TRUE, do not rewrite the file, simply gather and return the namespaces.
*
* @return array
* List of referenced namespaces, keyed by prefix.
*/
public static function preprocessFile($sourcefile, $destination, $unlink = TRUE, $namespaces_only = FALSE) {
// Cleanup some stuff in the process of moving the file to its final
// destination
$source_handle = fopen($sourcefile, 'r');
if (!$namespaces_only) {
$dest_handle = fopen($destination, 'w');
}
// First, get the header (everything before the <channel> element) to
// rewrite the namespaces (skipping any empty lines).
$header = '';
while (($line = fgets($source_handle)) !== FALSE) {
if (trim($line)) {
$header .= $line;
if (strpos($line, '<channel>') !== FALSE) {
break;
}
}
}
// The excerpt namespace is sometimes omitted, stuff it in if necessary
$excerpt_ns = 'xmlns:excerpt="http://wordpress.org/export/1.0/excerpt/"';
$excerpt_signature = 'xmlns:excerpt="http://wordpress.org/export/';
$content_ns = 'xmlns:content="http://purl.org/rss/1.0/modules/content/"';
if (!strpos($header, $excerpt_signature)) {
$header = str_replace($content_ns, $excerpt_ns . "\n\t" . $content_ns, $header);
}
// Add the Atom namespace, in case it's referenced
$atom_ns = 'xmlns:atom="http://www.w3.org/2005/Atom"';
$header = str_replace($content_ns, $atom_ns . "\n\t" . $content_ns, $header);
// What the hell, throw in iTunes too
$itunes_ns = 'xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"';
$header = str_replace($content_ns, $itunes_ns . "\n\t" . $content_ns, $header);
preg_match_all('|xmlns:(.+?)="(.+?)"|i', $header, $matches, PREG_SET_ORDER);
$namespaces = array();
foreach ($matches as $index => $match) {
$namespaces[$match[1]] = $match[2];
}
if ($namespaces_only) {
return $namespaces;
}
// Replace HTML entities with XML entities
$header = strtr($header, self::$entityReplacements);
fputs($dest_handle, $header);
// Now, do some line-by-line fix-ups fix unencoded ampersands and bogus characters on a line-by-line basis
while ($line = fgets($source_handle)) {
// Handle unencoded ampersands
$line = preg_replace('/&(?![\\w\\d#]+;)/', '&', $line);
// Remove control characters (the regex removes the newline, so tack it back on)
$line = preg_replace('~\\p{C}+~u', '', $line) . "\n";
// WordPress export doesn't properly format embedded CDATA sections - our
// quick-and-dirty fix is to remove the terminator of the embedded section
$line = preg_replace('|// \\]\\]|', '', $line);
// Replace HTML entities with XML entities
$line = strtr($line, self::$entityReplacements);
fputs($dest_handle, $line);
}
fclose($dest_handle);
fclose($source_handle);
if ($unlink) {
unlink($sourcefile);
}
return $namespaces;
}
/**
* Translation table between HTML entities and XML entities; some WP blogs
* use HTML entities in XML.
*
* @var array
*/
protected static $entityReplacements = array(
'Æ' => 'Æ',
'Á' => 'Á',
'Â' => 'Â',
'À' => 'À',
'Α' => 'Α',
'Å' => 'Å',
'Ã' => 'Ã',
'Ä' => 'Ä',
'Β' => 'Β',
'Ç' => 'Ç',
'Χ' => 'Χ',
'‡' => '‡',
'Δ' => 'Δ',
'Ð' => 'Ð',
'É' => 'É',
'Ê' => 'Ê',
'È' => 'È',
'Ε' => 'Ε',
'Η' => 'Η',
'Ë' => 'Ë',
'Γ' => 'Γ',
'Í' => 'Í',
'Î' => 'Î',
'Ì' => 'Ì',
'Ι' => 'Ι',
'Ï' => 'Ï',
'Κ' => 'Κ',
'Λ' => 'Λ',
'Μ' => 'Μ',
'Ñ' => 'Ñ',
'Ν' => 'Ν',
'Œ' => 'Œ',
'Ó' => 'Ó',
'Ô' => 'Ô',
'Ò' => 'Ò',
'Ω' => 'Ω',
'Ο' => 'Ο',
'Ø' => 'Ø',
'Õ' => 'Õ',
'Ö' => 'Ö',
'Φ' => 'Φ',
'Π' => 'Π',
'″' => '″',
'Ψ' => 'Ψ',
'Ρ' => 'Ρ',
'Š' => 'Š',
'Σ' => 'Σ',
'Þ' => 'Þ',
'Τ' => 'Τ',
'Θ' => 'Θ',
'Ú' => 'Ú',
'Û' => 'Û',
'Ù' => 'Ù',
'Υ' => 'Υ',
'Ü' => 'Ü',
'Ξ' => 'Ξ',
'Ý' => 'Ý',
'Ÿ' => 'Ÿ',
'Ζ' => 'Ζ',
'á' => 'á',
'â' => 'â',
'´' => '´',
'æ' => 'æ',
'à' => 'à',
'ℵ' => 'ℵ',
'α' => 'α',
'∧' => '∧',
'∠' => '∠',
'å' => 'å',
'≈' => '≈',
'ã' => 'ã',
'ä' => 'ä',
'„' => '„',
'β' => 'β',
'¦' => '¦',
'•' => '•',
'∩' => '∩',
'ç' => 'ç',
'¸' => '¸',
'¢' => '¢',
'χ' => 'χ',
'ˆ' => 'ˆ',
'♣' => '♣',
'≅' => '≅',
'©' => '©',
'↵' => '↵',
'∪' => '∪',
'¤' => '¤',
'⇓' => '⇓',
'†' => '†',
'↓' => '↓',
'°' => '°',
'δ' => 'δ',
'♦' => '♦',
'÷' => '÷',
'é' => 'é',
'ê' => 'ê',
'è' => 'è',
'∅' => '∅',
' ' => ' ',
' ' => ' ',
'ε' => 'ε',
'≡' => '≡',
'η' => 'η',
'ð' => 'ð',
'ë' => 'ë',
'€' => '€',
'∃' => '∃',
'ƒ' => 'ƒ',
'∀' => '∀',
'½' => '½',
'¼' => '¼',
'¾' => '¾',
'⁄' => '⁄',
'γ' => 'γ',
'≥' => '≥',
'⇔' => '⇔',
'↔' => '↔',
'♥' => '♥',
'…' => '…',
'í' => 'í',
'î' => 'î',
'¡' => '¡',
'ì' => 'ì',
'ℑ' => 'ℑ',
'∞' => '∞',
'∫' => '∫',
'ι' => 'ι',
'¿' => '¿',
'∈' => '∈',
'ï' => 'ï',
'κ' => 'κ',
'⇐' => '⇐',
'λ' => 'λ',
'⟨' => '〈',
'«' => '«',
'←' => '←',
'⌈' => '⌈',
'“' => '“',
'≤' => '≤',
'⌊' => '⌊',
'∗' => '∗',
'◊' => '◊',
'‎' => '‎',
'‹' => '‹',
'‘' => '‘',
'¯' => '¯',
'—' => '—',
'µ' => 'µ',
'·' => '·',
'−' => '−',
'μ' => 'μ',
'∇' => '∇',
' ' => ' ',
'–' => '–',
'≠' => '≠',
'∋' => '∋',
'¬' => '¬',
'∉' => '∉',
'⊄' => '⊄',
'ñ' => 'ñ',
'ν' => 'ν',
'ó' => 'ó',
'ô' => 'ô',
'œ' => 'œ',
'ò' => 'ò',
'‾' => '‾',
'ω' => 'ω',
'ο' => 'ο',
'⊕' => '⊕',
'∨' => '∨',
'ª' => 'ª',
'º' => 'º',
'ø' => 'ø',
'õ' => 'õ',
'⊗' => '⊗',
'ö' => 'ö',
'¶' => '¶',
'∂' => '∂',
'‰' => '‰',
'⊥' => '⊥',
'φ' => 'φ',
'π' => 'π',
'ϖ' => 'ϖ',
'±' => '±',
'£' => '£',
'′' => '′',
'∏' => '∏',
'∝' => '∝',
'ψ' => 'ψ',
'⇒' => '⇒',
'√' => '√',
'⟩' => '〉',
'»' => '»',
'→' => '→',
'⌉' => '⌉',
'”' => '”',
'ℜ' => 'ℜ',
'®' => '®',
'⌋' => '⌋',
'ρ' => 'ρ',
'‏' => '‏',
'›' => '›',
'’' => '’',
'‚' => '‚',
'š' => 'š',
'⋅' => '⋅',
'§' => '§',
'­' => '­',
'σ' => 'σ',
'ς' => 'ς',
'∼' => '∼',
'♠' => '♠',
'⊂' => '⊂',
'⊆' => '⊆',
'∑' => '∑',
'¹' => '¹',
'²' => '²',
'³' => '³',
'⊃' => '⊃',
'⊇' => '⊇',
'ß' => 'ß',
'τ' => 'τ',
'∴' => '∴',
'θ' => 'θ',
'ϑ' => 'ϑ',
' ' => ' ',
'þ' => 'þ',
'˜' => '˜',
'×' => '×',
'™' => '™',
'⇑' => '⇑',
'ú' => 'ú',
'↑' => '↑',
'û' => 'û',
'ù' => 'ù',
'¨' => '¨',
'ϒ' => 'ϒ',
'υ' => 'υ',
'ü' => 'ü',
'℘' => '℘',
'ξ' => 'ξ',
'ý' => 'ý',
'¥' => '¥',
'ÿ' => 'ÿ',
'ζ' => 'ζ',
'‍' => '‍',
'‌' => '‌',
);
/**
* With earlier versions of libxml, XMLReader has no readString() method -
* mock it up if necessary.
*
* @param $reader
* XMLReader instance being iterated for XML parsing.
*
* @return string
*/
public static function readString(XMLReader $reader) {
if (method_exists('XMLReader', 'readString')) {
return $reader
->readString();
}
else {
$node = $reader
->expand();
return $node->textContent;
}
}
}
Classes
Name | Description |
---|---|
WordPressBlog | |
WordPressMigration | @file Implementation of migration from WordPress into Drupal |