You are here

class WordPressBlog in WordPress Migrate 7.2

Same name and namespace in other branches
  1. 7 wordpress.inc \WordPressBlog

Hierarchy

Expanded class hierarchy of WordPressBlog

1 string reference to 'WordPressBlog'
wordpress_migrate_blog_class in ./wordpress_migrate.module

File

./wordpress.inc, line 152
Implementation of migration from WordPress into Drupal

View source
class WordPressBlog {
  protected $blogID;
  public function getBlogID() {
    return $this->blogID;
  }
  protected $filename;
  public function getFilename() {
    return $this->filename;
  }
  protected $wxrVersion = '1.0';
  public function getWxrVersion() {
    return $this->wxrVersion;
  }
  protected $title;
  public function getTitle() {
    return $this->title;
  }
  protected $displayTitle;
  public function getDisplayTitle() {
    return $this->displayTitle;
  }
  protected $blog_url;
  public function getBlogUrl() {
    return $this->blog_url;
  }
  protected $link;
  public function getLink() {
    return $this->link;
  }
  protected $uid;
  public function getUid() {
    return $this->uid;
  }
  protected $arguments = array();
  protected $migrations = array();
  public function __construct($filename, $arguments = array()) {
    $this->filename = $filename;
    $this->arguments = $arguments;

    // Make sure the upload directory is properly protected
    file_create_htaccess('wordpress://', TRUE);

    // Suppress errors during parsing, so we can pick them up after
    libxml_use_internal_errors(TRUE);

    // Get the blog_url, which is our unique determiner of which blog we're
    // talking about
    $title = '';
    $reader = new XMLReader();
    $status = $reader
      ->open($this->filename);
    if ($status) {
      $this->blog_url = '';
      while ($reader
        ->read()) {
        if ($reader->nodeType == XMLREADER::ELEMENT) {
          switch ($reader->name) {
            case 'title':
              $title = WordPressBlog::readString($reader);
              $this->displayTitle = $title;
              break;
            case 'wp:wxr_version':
              $this->wxrVersion = WordPressBlog::readString($reader);
              break;
            case 'wp:base_blog_url':
              $this->blog_url = WordPressBlog::readString($reader);
              break;
            case 'link':
              $this->link = WordPressBlog::readString($reader);

              // Catch only the first link
              if (empty($this->link)) {
                $this->link = $reader
                  ->readString();
              }
              break;
          }
        }
        if (!empty($title) && !empty($this->blog_url) && !empty($this->link)) {
          break;
        }
      }
    }
    else {
      throw new Exception(t('Could not open XML file !url', array(
        '!url' => $this->filename,
      )));
    }

    // Validate that it really is a WXR file
    if (empty($this->blog_url)) {

      // Older WP versions did not have a blog_url but used link instead.
      if (!empty($this->link)) {
        $this->blog_url = $this->link;
      }
      else {
        throw new Exception(t('The uploaded file is not a valid WordPress export'));
      }
    }

    // Keep only alphabetic characters
    $this->title = preg_replace('/[^A-Za-z]/', '', $title);
    if (!$this->title) {
      $this->title = preg_replace('/[^A-Za-z]/', '', $this->blog_url);
    }
    global $user;
    $this->uid = $user->uid;
    $status = db_merge('wordpress_migrate')
      ->key(array(
      'blog_url' => $this->blog_url,
    ))
      ->fields(array(
      'title' => $this->title,
      'uid' => $this->uid,
      'link' => $this->link,
      'filename' => $this->filename,
      'wxr_version' => $this->wxrVersion,
    ))
      ->execute();
    $this->blogID = db_select('wordpress_migrate', 'wm')
      ->fields('wm', array(
      'blog_id',
    ))
      ->condition('blog_url', $this->blog_url)
      ->execute()
      ->fetchField();
  }
  public function machineName($class_name) {

    // If the default classes have been overridden, $class_name might be either
    // the default class name, or the name of the overridden class. Check first
    // for the former case, then the latter
    $classes = $this
      ->migrationClasses();
    if (!isset($classes[$class_name])) {
      $flipped = array_flip($classes);
      $class_name = $flipped[$class_name];
    }
    return $this->title . substr($class_name, strlen('WordPress'), strlen($class_name) - strlen('WordPress'));
  }

  /**
   * The implemented WordPress migrations, in the order they should be run.
   */
  public function migrationClasses() {
    return array(
      'WordPressAuthor' => 'WordPressAuthor',
      'WordPressCategory' => 'WordPressCategory',
      'WordPressTag' => 'WordPressTag',
      'WordPressBlogEntry' => 'WordPressBlogEntry',
      'WordPressPage' => 'WordPressPage',
      'WordPressAttachment' => 'WordPressAttachment',
      'WordPressComment' => 'WordPressComment',
    );
  }

  /**
   * Get a list of all migrations in this blog.
   *
   * @return Migration[]
   */
  public function migrations() {
    if (empty($this->migrations)) {
      $this->migrations = array();
      foreach ($this
        ->migrationClasses() as $base_class => $actual_class) {
        try {
          $this->migrations[$actual_class] = MigrationBase::getInstance($this
            ->machineName($actual_class));
        } catch (Exception $e) {

          // Simply ignore non-existent migrations
        }
      }
    }
    return $this->migrations;
  }

  /**
   * Get a list of all WordPress blogs.
   *
   * @return WordPressBlog[]
   */
  public static function blogs() {
    $blogs = array();
    $result = db_select('wordpress_migrate', 'wm')
      ->fields('wm', array(
      'filename',
    ))
      ->execute();
    foreach ($result as $row) {
      $blogs[] = wordpress_migrate_blog($row->filename);
    }
    return $blogs;
  }

  /**
   * WXR files typically need some cleanup to be successfully parsed - perform
   * that here.
   *
   * @param $sourcefile
   *  The raw WXR file as uploaded.
   * @param $destination
   *  Filespec to which to write the cleaned-up WXR file. Omit when
   *  $namespaces_only == TRUE.
   * @param bool $unlink
   *  Indicates whether $sourcefile will be deleted after preprocessing.
   * @param bool $namespaces_only
   *  When TRUE, do not rewrite the file, simply gather and return the namespaces.
   *
   * @return array
   *  List of referenced namespaces, keyed by prefix.
   */
  public static function preprocessFile($sourcefile, $destination, $unlink = TRUE, $namespaces_only = FALSE) {

    // Cleanup some stuff in the process of moving the file to its final
    // destination
    $source_handle = fopen($sourcefile, 'r');
    if (!$namespaces_only) {
      $dest_handle = fopen($destination, 'w');
    }

    // First, get the header (everything before the <channel> element) to
    // rewrite the namespaces (skipping any empty lines).
    $header = '';
    while (($line = fgets($source_handle)) !== FALSE) {
      if (trim($line)) {
        $header .= $line;
        if (strpos($line, '<channel>') !== FALSE) {
          break;
        }
      }
    }

    // The excerpt namespace is sometimes omitted, stuff it in if necessary
    $excerpt_ns = 'xmlns:excerpt="http://wordpress.org/export/1.0/excerpt/"';
    $excerpt_signature = 'xmlns:excerpt="http://wordpress.org/export/';
    $content_ns = 'xmlns:content="http://purl.org/rss/1.0/modules/content/"';
    if (!strpos($header, $excerpt_signature)) {
      $header = str_replace($content_ns, $excerpt_ns . "\n\t" . $content_ns, $header);
    }

    // Add the Atom namespace, in case it's referenced
    $atom_ns = 'xmlns:atom="http://www.w3.org/2005/Atom"';
    $header = str_replace($content_ns, $atom_ns . "\n\t" . $content_ns, $header);

    // What the hell, throw in iTunes too
    $itunes_ns = 'xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"';
    $header = str_replace($content_ns, $itunes_ns . "\n\t" . $content_ns, $header);
    preg_match_all('|xmlns:(.+?)="(.+?)"|i', $header, $matches, PREG_SET_ORDER);
    $namespaces = array();
    foreach ($matches as $index => $match) {
      $namespaces[$match[1]] = $match[2];
    }
    if ($namespaces_only) {
      return $namespaces;
    }

    // Replace HTML entities with XML entities
    $header = strtr($header, self::$entityReplacements);
    fputs($dest_handle, $header);

    // Now, do some line-by-line fix-ups fix unencoded ampersands and bogus characters on a line-by-line basis
    while ($line = fgets($source_handle)) {

      // Handle unencoded ampersands
      $line = preg_replace('/&(?![\\w\\d#]+;)/', '&amp;', $line);

      // Remove control characters (the regex removes the newline, so tack it back on)
      $line = preg_replace('~\\p{C}+~u', '', $line) . "\n";

      // WordPress export doesn't properly format embedded CDATA sections - our
      // quick-and-dirty fix is to remove the terminator of the embedded section
      $line = preg_replace('|// \\]\\]|', '', $line);

      // Replace HTML entities with XML entities
      $line = strtr($line, self::$entityReplacements);
      fputs($dest_handle, $line);
    }
    fclose($dest_handle);
    fclose($source_handle);
    if ($unlink) {
      unlink($sourcefile);
    }
    return $namespaces;
  }

  /**
   * Translation table between HTML entities and XML entities; some WP blogs
   * use HTML entities in XML.
   *
   * @var array
   */
  protected static $entityReplacements = array(
    '&AElig;' => '&#198;',
    '&Aacute;' => '&#193;',
    '&Acirc;' => '&#194;',
    '&Agrave;' => '&#192;',
    '&Alpha;' => '&#913;',
    '&Aring;' => '&#197;',
    '&Atilde;' => '&#195;',
    '&Auml;' => '&#196;',
    '&Beta;' => '&#914;',
    '&Ccedil;' => '&#199;',
    '&Chi;' => '&#935;',
    '&Dagger;' => '&#8225;',
    '&Delta;' => '&#916;',
    '&ETH;' => '&#208;',
    '&Eacute;' => '&#201;',
    '&Ecirc;' => '&#202;',
    '&Egrave;' => '&#200;',
    '&Epsilon;' => '&#917;',
    '&Eta;' => '&#919;',
    '&Euml;' => '&#203;',
    '&Gamma;' => '&#915;',
    '&Iacute;' => '&#205;',
    '&Icirc;' => '&#206;',
    '&Igrave;' => '&#204;',
    '&Iota;' => '&#921;',
    '&Iuml;' => '&#207;',
    '&Kappa;' => '&#922;',
    '&Lambda;' => '&#923;',
    '&Mu;' => '&#924;',
    '&Ntilde;' => '&#209;',
    '&Nu;' => '&#925;',
    '&OElig;' => '&#338;',
    '&Oacute;' => '&#211;',
    '&Ocirc;' => '&#212;',
    '&Ograve;' => '&#210;',
    '&Omega;' => '&#937;',
    '&Omicron;' => '&#927;',
    '&Oslash;' => '&#216;',
    '&Otilde;' => '&#213;',
    '&Ouml;' => '&#214;',
    '&Phi;' => '&#934;',
    '&Pi;' => '&#928;',
    '&Prime;' => '&#8243;',
    '&Psi;' => '&#936;',
    '&Rho;' => '&#929;',
    '&Scaron;' => '&#352;',
    '&Sigma;' => '&#931;',
    '&THORN;' => '&#222;',
    '&Tau;' => '&#932;',
    '&Theta;' => '&#920;',
    '&Uacute;' => '&#218;',
    '&Ucirc;' => '&#219;',
    '&Ugrave;' => '&#217;',
    '&Upsilon;' => '&#933;',
    '&Uuml;' => '&#220;',
    '&Xi;' => '&#926;',
    '&Yacute;' => '&#221;',
    '&Yuml;' => '&#376;',
    '&Zeta;' => '&#918;',
    '&aacute;' => '&#225;',
    '&acirc;' => '&#226;',
    '&acute;' => '&#180;',
    '&aelig;' => '&#230;',
    '&agrave;' => '&#224;',
    '&alefsym;' => '&#8501;',
    '&alpha;' => '&#945;',
    '&and;' => '&#8743;',
    '&ang;' => '&#8736;',
    '&aring;' => '&#229;',
    '&asymp;' => '&#8776;',
    '&atilde;' => '&#227;',
    '&auml;' => '&#228;',
    '&bdquo;' => '&#8222;',
    '&beta;' => '&#946;',
    '&brvbar;' => '&#166;',
    '&bull;' => '&#8226;',
    '&cap;' => '&#8745;',
    '&ccedil;' => '&#231;',
    '&cedil;' => '&#184;',
    '&cent;' => '&#162;',
    '&chi;' => '&#967;',
    '&circ;' => '&#710;',
    '&clubs;' => '&#9827;',
    '&cong;' => '&#8773;',
    '&copy;' => '&#169;',
    '&crarr;' => '&#8629;',
    '&cup;' => '&#8746;',
    '&curren;' => '&#164;',
    '&dArr;' => '&#8659;',
    '&dagger;' => '&#8224;',
    '&darr;' => '&#8595;',
    '&deg;' => '&#176;',
    '&delta;' => '&#948;',
    '&diams;' => '&#9830;',
    '&divide;' => '&#247;',
    '&eacute;' => '&#233;',
    '&ecirc;' => '&#234;',
    '&egrave;' => '&#232;',
    '&empty;' => '&#8709;',
    '&emsp;' => '&#8195;',
    '&ensp;' => '&#8194;',
    '&epsilon;' => '&#949;',
    '&equiv;' => '&#8801;',
    '&eta;' => '&#951;',
    '&eth;' => '&#240;',
    '&euml;' => '&#235;',
    '&euro;' => '&#8364;',
    '&exist;' => '&#8707;',
    '&fnof;' => '&#402;',
    '&forall;' => '&#8704;',
    '&frac12;' => '&#189;',
    '&frac14;' => '&#188;',
    '&frac34;' => '&#190;',
    '&frasl;' => '&#8260;',
    '&gamma;' => '&#947;',
    '&ge;' => '&#8805;',
    '&hArr;' => '&#8660;',
    '&harr;' => '&#8596;',
    '&hearts;' => '&#9829;',
    '&hellip;' => '&#8230;',
    '&iacute;' => '&#237;',
    '&icirc;' => '&#238;',
    '&iexcl;' => '&#161;',
    '&igrave;' => '&#236;',
    '&image;' => '&#8465;',
    '&infin;' => '&#8734;',
    '&int;' => '&#8747;',
    '&iota;' => '&#953;',
    '&iquest;' => '&#191;',
    '&isin;' => '&#8712;',
    '&iuml;' => '&#239;',
    '&kappa;' => '&#954;',
    '&lArr;' => '&#8656;',
    '&lambda;' => '&#955;',
    '&lang;' => '&#9001;',
    '&laquo;' => '&#171;',
    '&larr;' => '&#8592;',
    '&lceil;' => '&#8968;',
    '&ldquo;' => '&#8220;',
    '&le;' => '&#8804;',
    '&lfloor;' => '&#8970;',
    '&lowast;' => '&#8727;',
    '&loz;' => '&#9674;',
    '&lrm;' => '&#8206;',
    '&lsaquo;' => '&#8249;',
    '&lsquo;' => '&#8216;',
    '&macr;' => '&#175;',
    '&mdash;' => '&#8212;',
    '&micro;' => '&#181;',
    '&middot;' => '&#183;',
    '&minus;' => '&#8722;',
    '&mu;' => '&#956;',
    '&nabla;' => '&#8711;',
    '&nbsp;' => '&#160;',
    '&ndash;' => '&#8211;',
    '&ne;' => '&#8800;',
    '&ni;' => '&#8715;',
    '&not;' => '&#172;',
    '&notin;' => '&#8713;',
    '&nsub;' => '&#8836;',
    '&ntilde;' => '&#241;',
    '&nu;' => '&#957;',
    '&oacute;' => '&#243;',
    '&ocirc;' => '&#244;',
    '&oelig;' => '&#339;',
    '&ograve;' => '&#242;',
    '&oline;' => '&#8254;',
    '&omega;' => '&#969;',
    '&omicron;' => '&#959;',
    '&oplus;' => '&#8853;',
    '&or;' => '&#8744;',
    '&ordf;' => '&#170;',
    '&ordm;' => '&#186;',
    '&oslash;' => '&#248;',
    '&otilde;' => '&#245;',
    '&otimes;' => '&#8855;',
    '&ouml;' => '&#246;',
    '&para;' => '&#182;',
    '&part;' => '&#8706;',
    '&permil;' => '&#8240;',
    '&perp;' => '&#8869;',
    '&phi;' => '&#966;',
    '&pi;' => '&#960;',
    '&piv;' => '&#982;',
    '&plusmn;' => '&#177;',
    '&pound;' => '&#163;',
    '&prime;' => '&#8242;',
    '&prod;' => '&#8719;',
    '&prop;' => '&#8733;',
    '&psi;' => '&#968;',
    '&rArr;' => '&#8658;',
    '&radic;' => '&#8730;',
    '&rang;' => '&#9002;',
    '&raquo;' => '&#187;',
    '&rarr;' => '&#8594;',
    '&rceil;' => '&#8969;',
    '&rdquo;' => '&#8221;',
    '&real;' => '&#8476;',
    '&reg;' => '&#174;',
    '&rfloor;' => '&#8971;',
    '&rho;' => '&#961;',
    '&rlm;' => '&#8207;',
    '&rsaquo;' => '&#8250;',
    '&rsquo;' => '&#8217;',
    '&sbquo;' => '&#8218;',
    '&scaron;' => '&#353;',
    '&sdot;' => '&#8901;',
    '&sect;' => '&#167;',
    '&shy;' => '&#173;',
    '&sigma;' => '&#963;',
    '&sigmaf;' => '&#962;',
    '&sim;' => '&#8764;',
    '&spades;' => '&#9824;',
    '&sub;' => '&#8834;',
    '&sube;' => '&#8838;',
    '&sum;' => '&#8721;',
    '&sup1;' => '&#185;',
    '&sup2;' => '&#178;',
    '&sup3;' => '&#179;',
    '&sup;' => '&#8835;',
    '&supe;' => '&#8839;',
    '&szlig;' => '&#223;',
    '&tau;' => '&#964;',
    '&there4;' => '&#8756;',
    '&theta;' => '&#952;',
    '&thetasym;' => '&#977;',
    '&thinsp;' => '&#8201;',
    '&thorn;' => '&#254;',
    '&tilde;' => '&#732;',
    '&times;' => '&#215;',
    '&trade;' => '&#8482;',
    '&uArr;' => '&#8657;',
    '&uacute;' => '&#250;',
    '&uarr;' => '&#8593;',
    '&ucirc;' => '&#251;',
    '&ugrave;' => '&#249;',
    '&uml;' => '&#168;',
    '&upsih;' => '&#978;',
    '&upsilon;' => '&#965;',
    '&uuml;' => '&#252;',
    '&weierp;' => '&#8472;',
    '&xi;' => '&#958;',
    '&yacute;' => '&#253;',
    '&yen;' => '&#165;',
    '&yuml;' => '&#255;',
    '&zeta;' => '&#950;',
    '&zwj;' => '&#8205;',
    '&zwnj;' => '&#8204;',
  );

  /**
   * With earlier versions of libxml, XMLReader has no readString() method -
   * mock it up if necessary.
   *
   * @param $reader
   *  XMLReader instance being iterated for XML parsing.
   *
   * @return string
   */
  public static function readString(XMLReader $reader) {
    if (method_exists('XMLReader', 'readString')) {
      return $reader
        ->readString();
    }
    else {
      $node = $reader
        ->expand();
      return $node->textContent;
    }
  }

}

Members

Namesort descending Modifiers Type Description Overrides
WordPressBlog::$arguments protected property
WordPressBlog::$blogID protected property
WordPressBlog::$blog_url protected property
WordPressBlog::$displayTitle protected property
WordPressBlog::$entityReplacements protected static property Translation table between HTML entities and XML entities; some WP blogs use HTML entities in XML.
WordPressBlog::$filename protected property
WordPressBlog::$link protected property
WordPressBlog::$migrations protected property
WordPressBlog::$title protected property
WordPressBlog::$uid protected property
WordPressBlog::$wxrVersion protected property
WordPressBlog::blogs public static function Get a list of all WordPress blogs.
WordPressBlog::getBlogID public function
WordPressBlog::getBlogUrl public function
WordPressBlog::getDisplayTitle public function
WordPressBlog::getFilename public function
WordPressBlog::getLink public function
WordPressBlog::getTitle public function
WordPressBlog::getUid public function
WordPressBlog::getWxrVersion public function
WordPressBlog::machineName public function
WordPressBlog::migrationClasses public function The implemented WordPress migrations, in the order they should be run.
WordPressBlog::migrations public function Get a list of all migrations in this blog.
WordPressBlog::preprocessFile public static function WXR files typically need some cleanup to be successfully parsed - perform that here.
WordPressBlog::readString public static function With earlier versions of libxml, XMLReader has no readString() method - mock it up if necessary.
WordPressBlog::__construct public function