class QueryPathHtmlParser in Feeds extensible parsers 8
Defines a HTML parser using QueryPath.
@todo Make convertEncoding() into a helper function so that they aren't \ copied in 2 places.
Plugin annotation
@FeedsParser(
id = "querypathhtml",
title = @Translation("QueryPath HTML"),
description = @Translation("Parse HTML with QueryPath.")
)
Hierarchy
- class \Drupal\Component\Plugin\PluginBase implements DerivativeInspectionInterface, PluginInspectionInterface
- class \Drupal\Core\Plugin\PluginBase uses DependencySerializationTrait, MessengerTrait, StringTranslationTrait
- class \Drupal\feeds\Plugin\Type\PluginBase implements FeedsPluginInterface uses DependencyTrait
- class \Drupal\feeds\Feeds\Parser\ParserBase implements MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds_ex\Feeds\Parser\ParserBase implements PluginFormInterface, MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds_ex\Feeds\Parser\XmlParser implements ContainerFactoryPluginInterface uses XmlParserTrait
- class \Drupal\feeds_ex\Feeds\Parser\QueryPathXmlParser
- class \Drupal\feeds_ex\Feeds\Parser\QueryPathHtmlParser
- class \Drupal\feeds_ex\Feeds\Parser\QueryPathXmlParser
- class \Drupal\feeds_ex\Feeds\Parser\XmlParser implements ContainerFactoryPluginInterface uses XmlParserTrait
- class \Drupal\feeds_ex\Feeds\Parser\ParserBase implements PluginFormInterface, MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds\Feeds\Parser\ParserBase implements MappingPluginFormInterface, ParserInterface
- class \Drupal\feeds\Plugin\Type\PluginBase implements FeedsPluginInterface uses DependencyTrait
- class \Drupal\Core\Plugin\PluginBase uses DependencySerializationTrait, MessengerTrait, StringTranslationTrait
Expanded class hierarchy of QueryPathHtmlParser
1 file declares its use of QueryPathHtmlParser
- QueryPathHtmlParserTest.php in tests/
src/ Unit/ Feeds/ Parser/ QueryPathHtmlParserTest.php
File
- src/
Feeds/ Parser/ QueryPathHtmlParser.php, line 22
Namespace
Drupal\feeds_ex\Feeds\ParserView source
class QueryPathHtmlParser extends QueryPathXmlParser {
/**
* {@inheritdoc}
*/
protected $encoderClass = '\\Drupal\\feeds_ex\\Encoder\\HtmlEncoder';
/**
* {@inheritdoc}
*/
protected function setUp(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) {
// Change some parser settings.
$this->queryPathOptions['use_parser'] = 'html';
}
/**
* {@inheritdoc}
*/
protected function getRawValue(DOMQuery $node) {
return $node
->html();
}
/**
* {@inheritdoc}
*/
protected function prepareDocument(FeedInterface $feed, FetcherResultInterface $fetcher_result) {
$raw = $this
->prepareRaw($fetcher_result);
if ($this->configuration['use_tidy'] && extension_loaded('tidy')) {
$raw = tidy_repair_string($raw, $this
->getTidyConfig(), 'utf8');
}
return $this->utility
->createHtmlDocument($raw);
}
/**
* {@inheritdoc}
*/
protected function getTidyConfig() {
return [
'merge-divs' => FALSE,
'merge-spans' => FALSE,
'join-styles' => FALSE,
'drop-empty-paras' => FALSE,
'wrap' => 0,
'tidy-mark' => FALSE,
'escape-cdata' => TRUE,
'word-2000' => TRUE,
];
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
DependencySerializationTrait:: |
protected | property | An array of entity type IDs keyed by the property name of their storages. | |
DependencySerializationTrait:: |
protected | property | An array of service IDs keyed by property name used for serialization. | |
DependencySerializationTrait:: |
public | function | 1 | |
DependencySerializationTrait:: |
public | function | 2 | |
DependencyTrait:: |
protected | property | The object's dependencies. | |
DependencyTrait:: |
protected | function | Adds multiple dependencies. | |
DependencyTrait:: |
protected | function | Adds a dependency. | |
MessengerTrait:: |
protected | property | The messenger. | 29 |
MessengerTrait:: |
public | function | Gets the messenger. | 29 |
MessengerTrait:: |
public | function | Sets the messenger. | |
ParserBase:: |
protected | property | The encoder used to convert encodings. | |
ParserBase:: |
protected | property | The messenger, for compatibility with Drupal 8.5. | |
ParserBase:: |
protected static | property | The default list of HTML tags allowed by Xss::filter(). | |
ParserBase:: |
public | function | ||
ParserBase:: |
protected | function | Returns the description for single source. | 1 |
ParserBase:: |
protected | function | Renders our debug messages into a list. | |
ParserBase:: |
protected | function | Executes the source expressions. | |
ParserBase:: |
public | function | Returns the encoder. | |
ParserBase:: |
protected | function | Returns the configuration form table header. | |
ParserBase:: |
public | function |
Declare the possible mapping sources that this parser produces. Overrides ParserInterface:: |
|
ParserBase:: |
public | function | Gets the messenger. | |
ParserBase:: |
protected | function | Returns whether or not this parser uses a context query. | 2 |
ParserBase:: |
public | function | ||
ParserBase:: |
public | function |
Alter mapping form. Overrides ParserBase:: |
|
ParserBase:: |
public | function |
Submit handler for the mapping form. Overrides ParserBase:: |
|
ParserBase:: |
public | function |
Validate handler for the mapping form. Overrides ParserBase:: |
|
ParserBase:: |
public | function |
Parses content returned by fetcher. Overrides ParserInterface:: |
|
ParserBase:: |
protected | function | Performs the actual parsing. | 2 |
ParserBase:: |
protected | function | Prepares the expressions for parsing. | |
ParserBase:: |
protected | function | Prepares the raw string for parsing. | |
ParserBase:: |
protected | function | Prepares the variable map used to substitution. | |
ParserBase:: |
protected | function | Prints errors to the screen. | |
ParserBase:: |
public | function | Sets the encoder. | |
ParserBase:: |
public | function | Sets the messenger. | |
ParserBase:: |
public | function | ||
ParserBase:: |
public | function | ||
ParserBase:: |
public | function | ||
ParserBase:: |
public | function |
Form submission handler. Overrides PluginFormInterface:: |
|
ParserBase:: |
public | function |
Form validation handler. Overrides PluginFormInterface:: |
|
ParserBase:: |
public | function | Builds configuration form for the parser settings. | |
PluginBase:: |
protected | property | Configuration information passed into the plugin. | 1 |
PluginBase:: |
protected | property | The importer this plugin is working for. | |
PluginBase:: |
protected | property | The link generator. | |
PluginBase:: |
protected | property | The plugin implementation definition. | 1 |
PluginBase:: |
protected | property | The plugin_id. | |
PluginBase:: |
protected | property | The url generator. | |
PluginBase:: |
public | function |
Calculates dependencies for the configured plugin. Overrides DependentPluginInterface:: |
2 |
PluginBase:: |
private | function | Returns the service container. | |
PluginBase:: |
public | function |
Returns default feed configuration. Overrides FeedsPluginInterface:: |
3 |
PluginBase:: |
constant | A string which is used to separate base plugin IDs from the derivative ID. | ||
PluginBase:: |
public | function |
Gets the base_plugin_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets this plugin's configuration. Overrides ConfigurableInterface:: |
|
PluginBase:: |
public | function |
Gets the derivative_id of the plugin instance. Overrides DerivativeInspectionInterface:: |
|
PluginBase:: |
public | function |
Gets the definition of the plugin implementation. Overrides PluginInspectionInterface:: |
3 |
PluginBase:: |
public | function |
Gets the plugin_id of the plugin instance. Overrides PluginInspectionInterface:: |
|
PluginBase:: |
public | function | Determines if the plugin is configurable. | |
PluginBase:: |
protected | function | Renders a link to a route given a route name and its parameters. | |
PluginBase:: |
protected | function | Returns the link generator service. | |
PluginBase:: |
public | function | A feed is being deleted. | 3 |
PluginBase:: |
public | function | A feed is being saved. | |
PluginBase:: |
public | function | The feed type is being deleted. | 1 |
PluginBase:: |
public | function | The feed type is being saved. | 1 |
PluginBase:: |
public | function |
Returns the type of plugin. Overrides FeedsPluginInterface:: |
|
PluginBase:: |
public | function |
Sets the configuration for this plugin instance. Overrides ConfigurableInterface:: |
1 |
PluginBase:: |
protected | function | Generates a URL or path for a specific route based on the given parameters. | |
PluginBase:: |
protected | function | Returns the URL generator service. | |
QueryPathHtmlParser:: |
protected | property |
The class used as the text encoder. Overrides XmlParser:: |
|
QueryPathHtmlParser:: |
protected | function |
Returns the raw value. Overrides QueryPathXmlParser:: |
|
QueryPathHtmlParser:: |
protected | function |
Returns the options for phptidy. Overrides XmlParser:: |
|
QueryPathHtmlParser:: |
protected | function |
Prepares the DOM document. Overrides XmlParser:: |
|
QueryPathHtmlParser:: |
protected | function |
Allows subclasses to prepare for parsing. Overrides XmlParser:: |
|
QueryPathXmlParser:: |
protected | property | Options passed to QueryPath. | |
QueryPathXmlParser:: |
protected | function |
Returns a form element for a specific column. Overrides XmlParser:: |
|
QueryPathXmlParser:: |
protected | function |
Returns the list of table headers. Overrides XmlParser:: |
|
QueryPathXmlParser:: |
protected | function |
Returns rows to be parsed. Overrides XmlParser:: |
|
QueryPathXmlParser:: |
protected | function |
Executes a single source expression. Overrides XmlParser:: |
|
QueryPathXmlParser:: |
protected | function |
Loads the necessary library. Overrides ParserBase:: |
|
QueryPathXmlParser:: |
protected | function |
Validates an expression. Overrides XmlParser:: |
|
StringTranslationTrait:: |
protected | property | The string translation service. | 1 |
StringTranslationTrait:: |
protected | function | Formats a string containing a count of items. | |
StringTranslationTrait:: |
protected | function | Returns the number of plurals supported by a given language. | |
StringTranslationTrait:: |
protected | function | Gets the string translation service. | |
StringTranslationTrait:: |
public | function | Sets the string translation service to use. | 2 |
StringTranslationTrait:: |
protected | function | Translates a string to the current language or to a given language. | |
XmlParser:: |
protected | property | The previous value for the entity loader. | |
XmlParser:: |
protected | property | The previous value for XML error handling. | |
XmlParser:: |
protected | property | The XML helper class. | |
XmlParser:: |
protected | property | The XpathDomXpath object used for parsing. | |
XmlParser:: |
public | function |
Form constructor. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Allows subclasses to cleanup after parsing. Overrides ParserBase:: |
|
XmlParser:: |
public | function |
Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Returns the label for single source. Overrides ParserBase:: |
|
XmlParser:: |
public static | function |
Creates an instance of the plugin. Overrides ContainerFactoryPluginInterface:: |
|
XmlParser:: |
public | function |
Gets default configuration for this plugin. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Returns the errors after parsing. Overrides ParserBase:: |
|
XmlParser:: |
protected | function | Returns the inner XML of a DOM node. | |
XmlParser:: |
protected | function | Returns the raw XML of a DOM node. | 1 |
XmlParser:: |
public | function |
Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Starts internal error handling. Overrides ParserBase:: |
|
XmlParser:: |
protected | function |
Stops internal error handling. Overrides ParserBase:: |
|
XmlParser:: |
public | function |
Constructs a JsonParserBase object. Overrides ParserBase:: |
|
XmlParserTrait:: |
protected static | property | Matches the characters of an XML element. | |
XmlParserTrait:: |
protected static | property | The previous value of the entity loader. | |
XmlParserTrait:: |
protected static | property | The errors reported during parsing. | |
XmlParserTrait:: |
protected static | property | The previous value of libxml error reporting. | |
XmlParserTrait:: |
protected static | function | Returns a new DOMDocument. | |
XmlParserTrait:: |
protected static | function | Returns the errors reported during parsing. | |
XmlParserTrait:: |
protected static | function | Strips the default namespaces from an XML string. | |
XmlParserTrait:: |
protected static | function | Starts custom error handling. | |
XmlParserTrait:: |
protected static | function | Stops custom error handling. |