You are here

class XmlParser in Feeds extensible parsers 8

Defines a XML parser using XPath.

Plugin annotation


@FeedsParser(
  id = "xml",
  title = @Translation("XML"),
  description = @Translation("Parse XML with XPath.")
)

Hierarchy

Expanded class hierarchy of XmlParser

1 file declares its use of XmlParser
XmlParserTest.php in tests/src/Unit/Feeds/Parser/XmlParserTest.php

File

src/Feeds/Parser/XmlParser.php, line 30

Namespace

Drupal\feeds_ex\Feeds\Parser
View source
class XmlParser extends ParserBase implements ContainerFactoryPluginInterface {
  use XmlParserTrait;

  /**
   * The XpathDomXpath object used for parsing.
   *
   * @var \Drupal\feeds_ex\XpathDomXpath
   */
  protected $xpath;

  /**
   * The previous value for XML error handling.
   *
   * @var bool
   */
  protected $handleXmlErrors;

  /**
   * The previous value for the entity loader.
   *
   * @var bool
   */
  protected $entityLoader;

  /**
   * {@inheritdoc}
   */
  protected $encoderClass = '\\Drupal\\feeds_ex\\Encoder\\XmlEncoder';

  /**
   * The XML helper class.
   *
   * @var \Drupal\feeds_ex\Utility\XmlUtility
   */
  protected $utility;

  /**
   * Constructs a JsonParserBase object.
   *
   * @param array $configuration
   *   The plugin configuration.
   * @param string $plugin_id
   *   The plugin id.
   * @param array $plugin_definition
   *   The plugin definition.
   * @param \Drupal\feeds_ex\Utility\XmlUtility $utility
   *   The XML helper class.
   */
  public function __construct(array $configuration, $plugin_id, array $plugin_definition, XmlUtility $utility) {
    $this->utility = $utility;
    parent::__construct($configuration, $plugin_id, $plugin_definition);
  }

  /**
   * {@inheritdoc}
   */
  public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
    return new static($configuration, $plugin_id, $plugin_definition, $container
      ->get('feeds_ex.xml_utility'));
  }

  /**
   * {@inheritdoc}
   */
  protected function setUp(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) {
    $document = $this
      ->prepareDocument($feed, $fetcher_result);
    $this->xpath = new XpathDomXpath($document);
  }

  /**
   * {@inheritdoc}
   */
  protected function cleanUp(FeedInterface $feed, ParserResultInterface $result, StateInterface $state) {

    // Try to free up some memory. There shouldn't be any other references to
    // $this->xpath or the DOMDocument.
    unset($this->xpath);

    // Calculate progress.
    $state
      ->progress($state->total, $state->pointer);
  }

  /**
   * {@inheritdoc}
   */
  protected function executeContext(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) {
    if (!$state->total) {
      $state->total = $this->xpath
        ->evaluate('count(' . $this->configuration['context']['value'] . ')');
    }
    $start = (int) $state->pointer;
    $state->pointer = $start + $this->configuration['line_limit'];

    // A batched XPath expression.
    $context_query = '(' . $this->configuration['context']['value'] . ")[position() > {$start} and position() <= {$state->pointer}]";
    return $this->xpath
      ->query($context_query);
  }

  /**
   * {@inheritdoc}
   */
  protected function executeSourceExpression($machine_name, $expression, $row) {
    $result = $this->xpath
      ->evaluate($expression, $row);
    if (!$result instanceof DOMNodeList) {
      return $result;
    }
    if ($result->length == 0) {
      return;
    }
    $return = [];
    if (!empty($this->configuration['sources'][$machine_name]['inner'])) {
      foreach ($result as $node) {
        $return[] = $this
          ->getInnerXml($node);
      }
    }
    elseif (!empty($this->configuration['sources'][$machine_name]['raw'])) {
      foreach ($result as $node) {
        $return[] = $this
          ->getRaw($node);
      }
    }
    else {
      foreach ($result as $node) {
        $return[] = $node->nodeValue;
      }
    }

    // Return a single value if there's only one value.
    return count($return) === 1 ? reset($return) : $return;
  }

  /**
   * {@inheritdoc}
   */
  public function defaultConfiguration() {
    return [
      'use_tidy' => FALSE,
    ] + parent::defaultConfiguration();
  }

  /**
   * {@inheritdoc}
   */
  public function hasConfigForm() {
    return TRUE;
  }

  /**
   * {@inheritdoc}
   */
  public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
    $form = parent::buildConfigurationForm($form, $form_state);
    if (extension_loaded('tidy')) {
      $form['use_tidy'] = [
        '#type' => 'checkbox',
        '#title' => $this
          ->t('Use tidy'),
        '#description' => $this
          ->t('The <a href="http://php.net/manual/en/book.tidy.php">Tidy PHP</a> extension has been detected. Select this to clean the markup before parsing.'),
        '#default_value' => $this->configuration['use_tidy'],
      ];
    }
    return $form;
  }

  /**
   * {@inheritdoc}
   */
  protected function configSourceLabel() {
    return $this
      ->t('xpath source');
  }

  /**
   * {@inheritdoc}
   */
  protected function configFormTableHeader() {
    return [
      'raw' => $this
        ->t('Raw'),
      'inner' => $this
        ->t('Inner XML'),
    ];
  }

  /**
   * {@inheritdoc}
   */
  protected function configFormTableColumn(FormStateInterface $form_state, array $values, $column, $machine_name) {
    $id = 'feeds-ex-xml-raw-' . Html::escape($machine_name);
    switch ($column) {
      case 'raw':
        return [
          '#type' => 'checkbox',
          '#title' => $this
            ->t('Raw value'),
          '#title_display' => 'invisible',
          '#default_value' => (int) (!empty($values['raw'])),
          '#id' => $id,
        ];
      case 'inner':
        return [
          '#type' => 'checkbox',
          '#title' => $this
            ->t('Inner XML'),
          '#title_display' => 'invisible',
          '#default_value' => (int) (!empty($values['inner'])),
          '#states' => [
            'visible' => [
              '#' . $id => [
                'checked' => TRUE,
              ],
            ],
          ],
        ];
    }
  }

  /**
   * {@inheritdoc}
   */
  protected function validateExpression(&$expression) {
    $expression = trim($expression);
    $message = NULL;
    if (!$expression) {
      return $message;
    }
    $this
      ->startErrorHandling();
    $xml = new SimpleXMLElement("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<items></items>");
    $xml
      ->xpath($expression);
    if ($error = libxml_get_last_error()) {

      // Our variable substitution options can cause syntax errors, check if
      // we're doing that.
      if ($error->code == 1207 && strpos($expression, '$') !== FALSE) {

        // Do nothing.
      }
      elseif ($error->code != 1219) {
        $message = Html::escape(trim($error->message));
      }
    }
    $this
      ->stopErrorHandling();
    return $message;
  }

  /**
   * {@inheritdoc}
   */
  public function configFormValidate(&$values) {
    parent::configFormValidate($values);

    // Remove values that are inner but not raw.
    foreach ($values['sources'] as $machine_name => $source) {
      if (!empty($source['inner']) && empty($source['raw'])) {
        unset($values['sources'][$machine_name]['inner']);
      }
    }
  }

  /**
   * Prepares the DOM document.
   *
   * @param \Drupal\feeds\FeedInterface $feed
   *   The feed source.
   * @param \Drupal\feeds\Result\FetcherResultInterface $fetcher_result
   *   The fetcher result.
   *
   * @return DOMDocument
   *   The DOM document.
   */
  protected function prepareDocument(FeedInterface $feed, FetcherResultInterface $fetcher_result) {
    $raw = $this
      ->prepareRaw($fetcher_result);

    // Remove default namespaces. This has to run after the encoding conversion
    // because a limited set of encodings are supported in regular expressions.
    $raw = $this
      ->removeDefaultNamespaces($raw);
    if ($this->configuration['use_tidy'] && extension_loaded('tidy')) {
      $raw = tidy_repair_string($raw, $this
        ->getTidyConfig(), 'utf8');
    }
    $raw = $this->utility
      ->decodeNamedHtmlEntities($raw);
    return $this
      ->getDomDocument($raw);
  }

  /**
   * Returns the raw XML of a DOM node.
   *
   * @param \DOMNode $node
   *   The node to convert to raw XML.
   *
   * @return string
   *   The raw XML.
   */
  protected function getRaw(DOMNode $node) {
    return $node->ownerDocument
      ->saveXML($node);
  }

  /**
   * Returns the inner XML of a DOM node.
   *
   * @param \DOMNode $node
   *   The node to convert to raw XML.
   *
   * @return string
   *   The inner XML.
   */
  protected function getInnerXml(DOMNode $node) {
    $buffer = '';
    foreach ($node->childNodes as $child) {
      $buffer .= $this
        ->getRaw($child);
    }
    return $buffer;
  }

  /**
   * {@inheritdoc}
   */
  protected function startErrorHandling() {
    parent::startErrorHandling();
    libxml_clear_errors();
    $this->handleXmlErrors = libxml_use_internal_errors(TRUE);

    // Only available in PHP >= 5.2.11.
    // See http://symfony.com/blog/security-release-symfony-2-0-17-released for
    // details.
    if (function_exists('libxml_disable_entity_loader')) {
      $this->entityLoader = libxml_disable_entity_loader(TRUE);
    }
  }

  /**
   * {@inheritdoc}
   */
  protected function stopErrorHandling() {
    parent::stopErrorHandling();
    libxml_clear_errors();
    libxml_use_internal_errors($this->handleXmlErrors);
    if (function_exists('libxml_disable_entity_loader')) {
      libxml_disable_entity_loader($this->entityLoader);
    }
  }

  /**
   * {@inheritdoc}
   */
  protected function getErrors() {
    $return = [];
    foreach (libxml_get_errors() as $error) {

      // Translate error values.
      switch ($error->level) {
        case LIBXML_ERR_FATAL:
          $severity = RfcLogLevel::ERROR;
          break;
        case LIBXML_ERR_ERROR:
          $severity = RfcLogLevel::WARNING;
          break;
        default:
          $severity = RfcLogLevel::NOTICE;
          break;
      }
      $return[] = [
        'message' => '%error on line %num. Error code: %code',
        'variables' => [
          '%error' => trim($error->message),
          '%num' => $error->line,
          '%code' => $error->code,
        ],
        'severity' => $severity,
      ];
    }
    return $return;
  }

  /**
   * Returns the options for phptidy.
   *
   * @see http://php.net/manual/en/book.tidy.php
   * @see tidy_repair_string()
   *
   * @return array
   *   The configuration array.
   */
  protected function getTidyConfig() {
    return [
      'input-xml' => TRUE,
      'output-xml' => TRUE,
      'add-xml-decl' => TRUE,
      'wrap' => 0,
      'tidy-mark' => FALSE,
    ];
  }

}

Members

Namesort descending Modifiers Type Description Overrides
DependencySerializationTrait::$_entityStorages protected property An array of entity type IDs keyed by the property name of their storages.
DependencySerializationTrait::$_serviceIds protected property An array of service IDs keyed by property name used for serialization.
DependencySerializationTrait::__sleep public function 1
DependencySerializationTrait::__wakeup public function 2
DependencyTrait::$dependencies protected property The object's dependencies.
DependencyTrait::addDependencies protected function Adds multiple dependencies.
DependencyTrait::addDependency protected function Adds a dependency.
MessengerTrait::$messenger protected property The messenger. 29
MessengerTrait::messenger public function Gets the messenger. 29
MessengerTrait::setMessenger public function Sets the messenger.
ParserBase::$encoder protected property The encoder used to convert encodings.
ParserBase::$feedsExMessenger protected property The messenger, for compatibility with Drupal 8.5.
ParserBase::$htmlTags protected static property The default list of HTML tags allowed by Xss::filter().
ParserBase::buildFeedForm public function
ParserBase::configSourceDescription protected function Returns the description for single source. 1
ParserBase::debug protected function Renders our debug messages into a list.
ParserBase::executeSources protected function Executes the source expressions.
ParserBase::getEncoder public function Returns the encoder.
ParserBase::getFormHeader protected function Returns the configuration form table header.
ParserBase::getMappingSources public function Declare the possible mapping sources that this parser produces. Overrides ParserInterface::getMappingSources
ParserBase::getMessenger public function Gets the messenger.
ParserBase::hasConfigurableContext protected function Returns whether or not this parser uses a context query. 2
ParserBase::hasSourceConfig public function
ParserBase::loadLibrary protected function Loads the necessary library. 3
ParserBase::mappingFormAlter public function Alter mapping form. Overrides ParserBase::mappingFormAlter
ParserBase::mappingFormSubmit public function Submit handler for the mapping form. Overrides ParserBase::mappingFormSubmit
ParserBase::mappingFormValidate public function Validate handler for the mapping form. Overrides ParserBase::mappingFormValidate
ParserBase::parse public function Parses content returned by fetcher. Overrides ParserInterface::parse
ParserBase::parseItems protected function Performs the actual parsing. 2
ParserBase::prepareExpressions protected function Prepares the expressions for parsing.
ParserBase::prepareRaw protected function Prepares the raw string for parsing.
ParserBase::prepareVariables protected function Prepares the variable map used to substitution.
ParserBase::printErrors protected function Prints errors to the screen.
ParserBase::setEncoder public function Sets the encoder.
ParserBase::setFeedsExMessenger public function Sets the messenger.
ParserBase::sourceDefaults public function
ParserBase::sourceFormValidate public function
ParserBase::sourceSave public function
ParserBase::submitConfigurationForm public function Form submission handler. Overrides PluginFormInterface::submitConfigurationForm
ParserBase::validateConfigurationForm public function Form validation handler. Overrides PluginFormInterface::validateConfigurationForm
ParserBase::_buildConfigurationForm public function Builds configuration form for the parser settings.
PluginBase::$configuration protected property Configuration information passed into the plugin. 1
PluginBase::$feedType protected property The importer this plugin is working for.
PluginBase::$linkGenerator protected property The link generator.
PluginBase::$pluginDefinition protected property The plugin implementation definition. 1
PluginBase::$pluginId protected property The plugin_id.
PluginBase::$urlGenerator protected property The url generator.
PluginBase::calculateDependencies public function Calculates dependencies for the configured plugin. Overrides DependentPluginInterface::calculateDependencies 2
PluginBase::container private function Returns the service container.
PluginBase::defaultFeedConfiguration public function Returns default feed configuration. Overrides FeedsPluginInterface::defaultFeedConfiguration 3
PluginBase::DERIVATIVE_SEPARATOR constant A string which is used to separate base plugin IDs from the derivative ID.
PluginBase::getBaseId public function Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface::getBaseId
PluginBase::getConfiguration public function Gets this plugin's configuration. Overrides ConfigurableInterface::getConfiguration
PluginBase::getDerivativeId public function Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface::getDerivativeId
PluginBase::getPluginDefinition public function Gets the definition of the plugin implementation. Overrides PluginInspectionInterface::getPluginDefinition 3
PluginBase::getPluginId public function Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface::getPluginId
PluginBase::isConfigurable public function Determines if the plugin is configurable.
PluginBase::l protected function Renders a link to a route given a route name and its parameters.
PluginBase::linkGenerator protected function Returns the link generator service.
PluginBase::onFeedDeleteMultiple public function A feed is being deleted. 3
PluginBase::onFeedSave public function A feed is being saved.
PluginBase::onFeedTypeDelete public function The feed type is being deleted. 1
PluginBase::onFeedTypeSave public function The feed type is being saved. 1
PluginBase::pluginType public function Returns the type of plugin. Overrides FeedsPluginInterface::pluginType
PluginBase::setConfiguration public function Sets the configuration for this plugin instance. Overrides ConfigurableInterface::setConfiguration 1
PluginBase::url protected function Generates a URL or path for a specific route based on the given parameters.
PluginBase::urlGenerator protected function Returns the URL generator service.
StringTranslationTrait::$stringTranslation protected property The string translation service. 1
StringTranslationTrait::formatPlural protected function Formats a string containing a count of items.
StringTranslationTrait::getNumberOfPlurals protected function Returns the number of plurals supported by a given language.
StringTranslationTrait::getStringTranslation protected function Gets the string translation service.
StringTranslationTrait::setStringTranslation public function Sets the string translation service to use. 2
StringTranslationTrait::t protected function Translates a string to the current language or to a given language.
XmlParser::$encoderClass protected property The class used as the text encoder. Overrides ParserBase::$encoderClass 2
XmlParser::$entityLoader protected property The previous value for the entity loader.
XmlParser::$handleXmlErrors protected property The previous value for XML error handling.
XmlParser::$utility protected property The XML helper class.
XmlParser::$xpath protected property The XpathDomXpath object used for parsing.
XmlParser::buildConfigurationForm public function Form constructor. Overrides ParserBase::buildConfigurationForm
XmlParser::cleanUp protected function Allows subclasses to cleanup after parsing. Overrides ParserBase::cleanUp
XmlParser::configFormTableColumn protected function Returns a form element for a specific column. Overrides ParserBase::configFormTableColumn 1
XmlParser::configFormTableHeader protected function Returns the list of table headers. Overrides ParserBase::configFormTableHeader 1
XmlParser::configFormValidate public function Overrides ParserBase::configFormValidate
XmlParser::configSourceLabel protected function Returns the label for single source. Overrides ParserBase::configSourceLabel
XmlParser::create public static function Creates an instance of the plugin. Overrides ContainerFactoryPluginInterface::create
XmlParser::defaultConfiguration public function Gets default configuration for this plugin. Overrides ParserBase::defaultConfiguration
XmlParser::executeContext protected function Returns rows to be parsed. Overrides ParserBase::executeContext 1
XmlParser::executeSourceExpression protected function Executes a single source expression. Overrides ParserBase::executeSourceExpression 1
XmlParser::getErrors protected function Returns the errors after parsing. Overrides ParserBase::getErrors
XmlParser::getInnerXml protected function Returns the inner XML of a DOM node.
XmlParser::getRaw protected function Returns the raw XML of a DOM node. 1
XmlParser::getTidyConfig protected function Returns the options for phptidy. 2
XmlParser::hasConfigForm public function Overrides ParserBase::hasConfigForm
XmlParser::prepareDocument protected function Prepares the DOM document. 2
XmlParser::setUp protected function Allows subclasses to prepare for parsing. Overrides ParserBase::setUp 1
XmlParser::startErrorHandling protected function Starts internal error handling. Overrides ParserBase::startErrorHandling
XmlParser::stopErrorHandling protected function Stops internal error handling. Overrides ParserBase::stopErrorHandling
XmlParser::validateExpression protected function Validates an expression. Overrides ParserBase::validateExpression 1
XmlParser::__construct public function Constructs a JsonParserBase object. Overrides ParserBase::__construct
XmlParserTrait::$_elementRegex protected static property Matches the characters of an XML element.
XmlParserTrait::$_entityLoader protected static property The previous value of the entity loader.
XmlParserTrait::$_errors protected static property The errors reported during parsing.
XmlParserTrait::$_useError protected static property The previous value of libxml error reporting.
XmlParserTrait::getDomDocument protected static function Returns a new DOMDocument.
XmlParserTrait::getXmlErrors protected static function Returns the errors reported during parsing.
XmlParserTrait::removeDefaultNamespaces protected static function Strips the default namespaces from an XML string.
XmlParserTrait::startXmlErrorHandling protected static function Starts custom error handling.
XmlParserTrait::stopXmlErrorHandling protected static function Stops custom error handling.