class XmlParser in Feeds extensible parsers 8
Defines a XML parser using XPath.
Plugin annotation
@FeedsParser(
id = "xml",
title = @Translation("XML"),
description = @Translation("Parse XML with XPath.")
)
Hierarchy
- class \Drupal\Component\Plugin\PluginBase implements DerivativeInspectionInterface, PluginInspectionInterface
- class \Drupal\Core\Plugin\PluginBase uses DependencySerializationTrait, MessengerTrait, StringTranslationTrait
- class \Drupal\feeds\Plugin\Type\PluginBase implements FeedsPluginInterface uses DependencyTrait
- class \Drupal\feeds\Feeds\Parser\ParserBase implements MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds_ex\Feeds\Parser\ParserBase implements PluginFormInterface, MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds_ex\Feeds\Parser\XmlParser implements ContainerFactoryPluginInterface uses XmlParserTrait
- class \Drupal\feeds_ex\Feeds\Parser\ParserBase implements PluginFormInterface, MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds\Feeds\Parser\ParserBase implements MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds\Plugin\Type\PluginBase implements FeedsPluginInterface uses DependencyTrait
- class \Drupal\Core\Plugin\PluginBase uses DependencySerializationTrait, MessengerTrait, StringTranslationTrait
Expanded class hierarchy of XmlParser
1 file declares its use of XmlParser
- XmlParserTest.php in tests/
src/ Unit/ Feeds/ Parser/ XmlParserTest.php
File
- src/
Feeds/ Parser/ XmlParser.php, line 30
Namespace
Drupal\feeds_ex\Feeds\ParserView source
class XmlParser extends ParserBase implements ContainerFactoryPluginInterface {
use XmlParserTrait;
/**
* The XpathDomXpath object used for parsing.
*
* @var \Drupal\feeds_ex\XpathDomXpath
*/
protected $xpath;
/**
* The previous value for XML error handling.
*
* @var bool
*/
protected $handleXmlErrors;
/**
* The previous value for the entity loader.
*
* @var bool
*/
protected $entityLoader;
/**
* {@inheritdoc}
*/
protected $encoderClass = '\\Drupal\\feeds_ex\\Encoder\\XmlEncoder';
/**
* The XML helper class.
*
* @var \Drupal\feeds_ex\Utility\XmlUtility
*/
protected $utility;
/**
* Constructs a JsonParserBase object.
*
* @param array $configuration
* The plugin configuration.
* @param string $plugin_id
* The plugin id.
* @param array $plugin_definition
* The plugin definition.
* @param \Drupal\feeds_ex\Utility\XmlUtility $utility
* The XML helper class.
*/
public function __construct(array $configuration, $plugin_id, array $plugin_definition, XmlUtility $utility) {
$this->utility = $utility;
parent::__construct($configuration, $plugin_id, $plugin_definition);
}
/**
* {@inheritdoc}
*/
public static function create(ContainerInterface $container, array $configuration, $plugin_id, $plugin_definition) {
return new static($configuration, $plugin_id, $plugin_definition, $container
->get('feeds_ex.xml_utility'));
}
/**
* {@inheritdoc}
*/
protected function setUp(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) {
$document = $this
->prepareDocument($feed, $fetcher_result);
$this->xpath = new XpathDomXpath($document);
}
/**
* {@inheritdoc}
*/
protected function cleanUp(FeedInterface $feed, ParserResultInterface $result, StateInterface $state) {
// Try to free up some memory. There shouldn't be any other references to
// $this->xpath or the DOMDocument.
unset($this->xpath);
// Calculate progress.
$state
->progress($state->total, $state->pointer);
}
/**
* {@inheritdoc}
*/
protected function executeContext(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) {
if (!$state->total) {
$state->total = $this->xpath
->evaluate('count(' . $this->configuration['context']['value'] . ')');
}
$start = (int) $state->pointer;
$state->pointer = $start + $this->configuration['line_limit'];
// A batched XPath expression.
$context_query = '(' . $this->configuration['context']['value'] . ")[position() > {$start} and position() <= {$state->pointer}]";
return $this->xpath
->query($context_query);
}
/**
* {@inheritdoc}
*/
protected function executeSourceExpression($machine_name, $expression, $row) {
$result = $this->xpath
->evaluate($expression, $row);
if (!$result instanceof DOMNodeList) {
return $result;
}
if ($result->length == 0) {
return;
}
$return = [];
if (!empty($this->configuration['sources'][$machine_name]['inner'])) {
foreach ($result as $node) {
$return[] = $this
->getInnerXml($node);
}
}
elseif (!empty($this->configuration['sources'][$machine_name]['raw'])) {
foreach ($result as $node) {
$return[] = $this
->getRaw($node);
}
}
else {
foreach ($result as $node) {
$return[] = $node->nodeValue;
}
}
// Return a single value if there's only one value.
return count($return) === 1 ? reset($return) : $return;
}
/**
* {@inheritdoc}
*/
public function defaultConfiguration() {
return [
'use_tidy' => FALSE,
] + parent::defaultConfiguration();
}
/**
* {@inheritdoc}
*/
public function hasConfigForm() {
return TRUE;
}
/**
* {@inheritdoc}
*/
public function buildConfigurationForm(array $form, FormStateInterface $form_state) {
$form = parent::buildConfigurationForm($form, $form_state);
if (extension_loaded('tidy')) {
$form['use_tidy'] = [
'#type' => 'checkbox',
'#title' => $this
->t('Use tidy'),
'#description' => $this
->t('The <a href="http://php.net/manual/en/book.tidy.php">Tidy PHP</a> extension has been detected. Select this to clean the markup before parsing.'),
'#default_value' => $this->configuration['use_tidy'],
];
}
return $form;
}
/**
* {@inheritdoc}
*/
protected function configSourceLabel() {
return $this
->t('xpath source');
}
/**
* {@inheritdoc}
*/
protected function configFormTableHeader() {
return [
'raw' => $this
->t('Raw'),
'inner' => $this
->t('Inner XML'),
];
}
/**
* {@inheritdoc}
*/
protected function configFormTableColumn(FormStateInterface $form_state, array $values, $column, $machine_name) {
$id = 'feeds-ex-xml-raw-' . Html::escape($machine_name);
switch ($column) {
case 'raw':
return [
'#type' => 'checkbox',
'#title' => $this
->t('Raw value'),
'#title_display' => 'invisible',
'#default_value' => (int) (!empty($values['raw'])),
'#id' => $id,
];
case 'inner':
return [
'#type' => 'checkbox',
'#title' => $this
->t('Inner XML'),
'#title_display' => 'invisible',
'#default_value' => (int) (!empty($values['inner'])),
'#states' => [
'visible' => [
'#' . $id => [
'checked' => TRUE,
],
],
],
];
}
}
/**
* {@inheritdoc}
*/
protected function validateExpression(&$expression) {
$expression = trim($expression);
$message = NULL;
if (!$expression) {
return $message;
}
$this
->startErrorHandling();
$xml = new SimpleXMLElement("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<items></items>");
$xml
->xpath($expression);
if ($error = libxml_get_last_error()) {
// Our variable substitution options can cause syntax errors, check if
// we're doing that.
if ($error->code == 1207 && strpos($expression, '$') !== FALSE) {
// Do nothing.
}
elseif ($error->code != 1219) {
$message = Html::escape(trim($error->message));
}
}
$this
->stopErrorHandling();
return $message;
}
/**
* {@inheritdoc}
*/
public function configFormValidate(&$values) {
parent::configFormValidate($values);
// Remove values that are inner but not raw.
foreach ($values['sources'] as $machine_name => $source) {
if (!empty($source['inner']) && empty($source['raw'])) {
unset($values['sources'][$machine_name]['inner']);
}
}
}
/**
* Prepares the DOM document.
*
* @param \Drupal\feeds\FeedInterface $feed
* The feed source.
* @param \Drupal\feeds\Result\FetcherResultInterface $fetcher_result
* The fetcher result.
*
* @return DOMDocument
* The DOM document.
*/
protected function prepareDocument(FeedInterface $feed, FetcherResultInterface $fetcher_result) {
$raw = $this
->prepareRaw($fetcher_result);
// Remove default namespaces. This has to run after the encoding conversion
// because a limited set of encodings are supported in regular expressions.
$raw = $this
->removeDefaultNamespaces($raw);
if ($this->configuration['use_tidy'] && extension_loaded('tidy')) {
$raw = tidy_repair_string($raw, $this
->getTidyConfig(), 'utf8');
}
$raw = $this->utility
->decodeNamedHtmlEntities($raw);
return $this
->getDomDocument($raw);
}
/**
* Returns the raw XML of a DOM node.
*
* @param \DOMNode $node
* The node to convert to raw XML.
*
* @return string
* The raw XML.
*/
protected function getRaw(DOMNode $node) {
return $node->ownerDocument
->saveXML($node);
}
/**
* Returns the inner XML of a DOM node.
*
* @param \DOMNode $node
* The node to convert to raw XML.
*
* @return string
* The inner XML.
*/
protected function getInnerXml(DOMNode $node) {
$buffer = '';
foreach ($node->childNodes as $child) {
$buffer .= $this
->getRaw($child);
}
return $buffer;
}
/**
* {@inheritdoc}
*/
protected function startErrorHandling() {
parent::startErrorHandling();
libxml_clear_errors();
$this->handleXmlErrors = libxml_use_internal_errors(TRUE);
// Only available in PHP >= 5.2.11.
// See http://symfony.com/blog/security-release-symfony-2-0-17-released for
// details.
if (function_exists('libxml_disable_entity_loader')) {
$this->entityLoader = libxml_disable_entity_loader(TRUE);
}
}
/**
* {@inheritdoc}
*/
protected function stopErrorHandling() {
parent::stopErrorHandling();
libxml_clear_errors();
libxml_use_internal_errors($this->handleXmlErrors);
if (function_exists('libxml_disable_entity_loader')) {
libxml_disable_entity_loader($this->entityLoader);
}
}
/**
* {@inheritdoc}
*/
protected function getErrors() {
$return = [];
foreach (libxml_get_errors() as $error) {
// Translate error values.
switch ($error->level) {
case LIBXML_ERR_FATAL:
$severity = RfcLogLevel::ERROR;
break;
case LIBXML_ERR_ERROR:
$severity = RfcLogLevel::WARNING;
break;
default:
$severity = RfcLogLevel::NOTICE;
break;
}
$return[] = [
'message' => '%error on line %num. Error code: %code',
'variables' => [
'%error' => trim($error->message),
'%num' => $error->line,
'%code' => $error->code,
],
'severity' => $severity,
];
}
return $return;
}
/**
* Returns the options for phptidy.
*
* @see http://php.net/manual/en/book.tidy.php
* @see tidy_repair_string()
*
* @return array
* The configuration array.
*/
protected function getTidyConfig() {
return [
'input-xml' => TRUE,
'output-xml' => TRUE,
'add-xml-decl' => TRUE,
'wrap' => 0,
'tidy-mark' => FALSE,
];
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
DependencySerializationTrait:: |
protected | property | An array of entity type IDs keyed by the property name of their storages. | |
DependencySerializationTrait:: |
protected | property | An array of service IDs keyed by property name used for serialization. | |
DependencySerializationTrait:: |
public | function | 1 | |
DependencySerializationTrait:: |
public | function | 2 | |
DependencyTrait:: |
protected | property | The object's dependencies. | |
DependencyTrait:: |
protected | function | Adds multiple dependencies. | |
DependencyTrait:: |
protected | function | Adds a dependency. | |
MessengerTrait:: |
protected | property | The messenger. | 29 |
MessengerTrait:: |
public | function | Gets the messenger. | 29 |
MessengerTrait:: |
public | function | Sets the messenger. | |
ParserBase:: |
protected | property | The encoder used to convert encodings. | |
ParserBase:: |
protected | property | The messenger, for compatibility with Drupal 8.5. | |
ParserBase:: |
protected static | property | The default list of HTML tags allowed by Xss::filter(). | |
ParserBase:: |
public | function | ||
ParserBase:: |
protected | function | Returns the description for single source. | 1 |
ParserBase:: |
protected | function | Renders our debug messages into a list. | |
ParserBase:: |
protected | function | Executes the source expressions. | |
ParserBase:: |
public | function | Returns the encoder. | |
ParserBase:: |
protected | function | Returns the configuration form table header. | |
ParserBase:: |
public | function |
Declare the possible mapping sources that this parser produces. Overrides ParserInterface:: |
|
ParserBase:: |
public | function | Gets the messenger. | |
ParserBase:: |
protected | function | Returns whether or not this parser uses a context query. | 2 |
ParserBase:: |
public | function | ||
ParserBase:: |
protected | function | Loads the necessary library. | 3 |
ParserBase:: |
public | function |
Alter mapping form. Overrides ParserBase:: |
|
ParserBase:: |
public | function |
Submit handler for the mapping form. Overrides ParserBase:: |
|
ParserBase:: |
public | function |
Validate handler for the mapping form. Overrides ParserBase:: |
|
ParserBase:: |
public | function |
Parses content returned by fetcher. Overrides ParserInterface:: |
|
ParserBase:: |
protected | function | Performs the actual parsing. | 2 |
ParserBase:: |
protected | function | Prepares the expressions for parsing. | |
ParserBase:: |
protected | function | Prepares the raw string for parsing. | |
ParserBase:: |
protected | function | Prepares the variable map used to substitution. | |
ParserBase:: |
protected | function | Prints errors to the screen. | |
ParserBase:: |
public | function | Sets the encoder. | |
ParserBase:: |
public | function | Sets the messenger. | |
ParserBase:: |
public | function | ||
ParserBase:: |
public | function | ||
ParserBase:: |
public | function | ||
ParserBase:: |
public | function |
Form submission handler. Overrides PluginFormInterface:: |
|
ParserBase:: |
public | function |
Form validation handler. Overrides PluginFormInterface:: |
|
ParserBase:: |
public | function | Builds configuration form for the parser settings. | |
PluginBase:: |
protected | property | Configuration information passed into the plugin. | 1 |
PluginBase:: |
protected | property | The importer this plugin is working for. | |
PluginBase:: |
protected | property | The link generator. | |
PluginBase:: |
protected | property | The plugin implementation definition. | 1 |
PluginBase:: |
protected | property | The plugin_id. | |
PluginBase:: |
protected | property | The url generator. | |
PluginBase:: |
public | function |
Calculates dependencies for the configured plugin. Overrides DependentPluginInterface:: |
2 |
PluginBase:: |
private | function | Returns the service container. | |
PluginBase:: |
public | function |
Returns default feed configuration. Overrides FeedsPluginInterface:: |
3 |
PluginBase:: |
constant | A string which is used to separate base plugin IDs from the derivative ID. | ||
PluginBase:: |
public | function |
Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets this plugin's configuration. Overrides ConfigurableInterface:: |
|
PluginBase:: |
public | function |
Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets the definition of the plugin implementation. Overrides PluginInspectionInterface:: |
3 |
PluginBase:: |
public | function |
Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface:: |
|
PluginBase:: |
public | function | Determines if the plugin is configurable. | |
PluginBase:: |
protected | function | Renders a link to a route given a route name and its parameters. | |
PluginBase:: |
protected | function | Returns the link generator service. | |
PluginBase:: |
public | function | A feed is being deleted. | 3 |
PluginBase:: |
public | function | A feed is being saved. | |
PluginBase:: |
public | function | The feed type is being deleted. | 1 |
PluginBase:: |
public | function | The feed type is being saved. | 1 |
PluginBase:: |
public | function |
Returns the type of plugin. Overrides FeedsPluginInterface:: |
|
PluginBase:: |
public | function |
Sets the configuration for this plugin instance. Overrides ConfigurableInterface:: |
1 |
PluginBase:: |
protected | function | Generates a URL or path for a specific route based on the given parameters. | |
PluginBase:: |
protected | function | Returns the URL generator service. | |
StringTranslationTrait:: |
protected | property | The string translation service. | 1 |
StringTranslationTrait:: |
protected | function | Formats a string containing a count of items. | |
StringTranslationTrait:: |
protected | function | Returns the number of plurals supported by a given language. | |
StringTranslationTrait:: |
protected | function | Gets the string translation service. | |
StringTranslationTrait:: |
public | function | Sets the string translation service to use. | 2 |
StringTranslationTrait:: |
protected | function | Translates a string to the current language or to a given language. | |
XmlParser:: |
protected | property |
The class used as the text encoder. Overrides ParserBase:: |
2 |
XmlParser:: |
protected | property | The previous value for the entity loader. | |
XmlParser:: |
protected | property | The previous value for XML error handling. | |
XmlParser:: |
protected | property | The XML helper class. | |
XmlParser:: |
protected | property | The XpathDomXpath object used for parsing. | |
XmlParser:: |
public | function |
Form constructor. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Allows subclasses to cleanup after parsing. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Returns a form element for a specific column. Overrides ParserBase:: |
1 |
XmlParser:: |
protected | function |
Returns the list of table headers. Overrides ParserBase:: |
1 |
XmlParser:: |
public | function |
Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Returns the label for single source. Overrides ParserBase:: |
|
XmlParser:: |
public static | function |
Creates an instance of the plugin. Overrides ContainerFactoryPluginInterface:: |
|
XmlParser:: |
public | function |
Gets default configuration for this plugin. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Returns rows to be parsed. Overrides ParserBase:: |
1 |
XmlParser:: |
protected | function |
Executes a single source expression. Overrides ParserBase:: |
1 |
XmlParser:: |
protected | function |
Returns the errors after parsing. Overrides ParserBase:: |
|
XmlParser:: |
protected | function | Returns the inner XML of a DOM node. | |
XmlParser:: |
protected | function | Returns the raw XML of a DOM node. | 1 |
XmlParser:: |
protected | function | Returns the options for phptidy. | 2 |
XmlParser:: |
public | function |
Overrides ParserBase:: |
|
XmlParser:: |
protected | function | Prepares the DOM document. | 2 |
XmlParser:: |
protected | function |
Allows subclasses to prepare for parsing. Overrides ParserBase:: |
1 |
XmlParser:: |
protected | function |
Starts internal error handling. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Stops internal error handling. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Validates an expression. Overrides ParserBase:: |
1 |
XmlParser:: |
public | function |
Constructs a JsonParserBase object. Overrides ParserBase:: |
|
XmlParserTrait:: |
protected static | property | Matches the characters of an XML element. | |
XmlParserTrait:: |
protected static | property | The previous value of the entity loader. | |
XmlParserTrait:: |
protected static | property | The errors reported during parsing. | |
XmlParserTrait:: |
protected static | property | The previous value of libxml error reporting. | |
XmlParserTrait:: |
protected static | function | Returns a new DOMDocument. | |
XmlParserTrait:: |
protected static | function | Returns the errors reported during parsing. | |
XmlParserTrait:: |
protected static | function | Strips the default namespaces from an XML string. | |
XmlParserTrait:: |
protected static | function | Starts custom error handling. | |
XmlParserTrait:: |
protected static | function | Stops custom error handling. |