class FeedsXPathParserHTML in Feeds XPath Parser 7
Same name and namespace in other branches
- 6 FeedsXPathParserHTML.inc \FeedsXPathParserHTML
XPath parsing for HTML.
Hierarchy
- class \FeedsXPathParserBase extends \FeedsParser
- class \FeedsXPathParserHTML
Expanded class hierarchy of FeedsXPathParserHTML
2 string references to 'FeedsXPathParserHTML'
- FeedsXPathParseHTMLTestCase::test in tests/
feeds_xpathparser_parser_html.test - Run tests.
- feeds_xpathparser_feeds_plugins in ./
feeds_xpathparser.module - Implements hook_feeds_plugins().
File
- ./
FeedsXPathParserHTML.inc, line 11 - s Contains FeedsXPathParserHTML.
View source
class FeedsXPathParserHTML extends FeedsXPathParserBase {
/**
* Whether this version of PHP has a useable saveHTML() method.
*
* @var bool
*/
protected $hasSaveHTML = FALSE;
/**
* {@inheritdoc}
*/
public function __construct($id) {
parent::__construct($id);
// DOMDocument::saveHTML() cannot take $node as an argument prior to 5.3.6.
if (version_compare(phpversion(), '5.3.6', '>=')) {
$this->hasSaveHTML = TRUE;
}
}
/**
* {@inheritdoc}
*/
protected function setup($source_config, FeedsFetcherResult $fetcher_result) {
if (!empty($source_config['exp']['tidy']) && extension_loaded('tidy')) {
$config = array(
'merge-divs' => FALSE,
'merge-spans' => FALSE,
'join-styles' => FALSE,
'drop-empty-paras' => FALSE,
'wrap' => 0,
'tidy-mark' => FALSE,
'escape-cdata' => TRUE,
'word-2000' => TRUE,
);
// Default tidy encoding is UTF8.
$encoding = $source_config['exp']['tidy_encoding'];
$raw = tidy_repair_string($fetcher_result
->getRaw(), $config, $encoding);
}
else {
$raw = $fetcher_result
->getRaw();
}
// Some versions of PHP do not handle null bytes.
$raw = str_replace("\0", '', $raw);
$document = new DOMDocument();
$document->strictErrorChecking = FALSE;
$document->recover = TRUE;
// Use our own error handling.
$use = $this
->errorStart();
if (version_compare(PHP_VERSION, '5.4.0', '>=')) {
$options = LIBXML_NONET;
$options |= defined('LIBXML_COMPACT') ? LIBXML_COMPACT : 0;
$options |= defined('LIBXML_PARSEHUGE') ? LIBXML_PARSEHUGE : 0;
$success = $document
->loadHTML($raw, $options);
}
else {
$success = $document
->loadHTML($raw);
}
$this
->errorStop($use, $source_config['exp']['errors']);
if (!$success) {
throw new Exception(t('There was an error parsing the HTML document.'));
}
return $document;
}
/**
* {@inheritdoc}
*/
protected function getRaw(DOMNode $node) {
if ($this->hasSaveHTML) {
return $this->doc
->saveHTML($node);
}
return $this->doc
->saveXML($node, LIBXML_NOEMPTYTAG);
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
FeedsXPathParserBase:: |
protected | property | The DOMDocument used for parsing. | |
FeedsXPathParserBase:: |
protected | property | The return value of libxml_disable_entity_loader(). | |
FeedsXPathParserBase:: |
protected | property | The elements that should be displayed in raw XML. | |
FeedsXPathParserBase:: |
protected | property | The DOMXPath objet used for parsing. | |
FeedsXPathParserBase:: |
public | function | Overrides parent::configDefaults(). | |
FeedsXPathParserBase:: |
public | function | Overrides parent::configForm(). | |
FeedsXPathParserBase:: |
public | function | Overrides parent::sourceFormValidate(). | |
FeedsXPathParserBase:: |
protected | function | Starts custom error handling. | |
FeedsXPathParserBase:: |
protected | function | Stops custom error handling. | |
FeedsXPathParserBase:: |
protected | function | Filters mappings, returning the ones that belong to us. | |
FeedsXPathParserBase:: |
public | function | Overrides parent::getMappingSources(). | |
FeedsXPathParserBase:: |
protected | function | Gets the mappings that are defined by this parser. | |
FeedsXPathParserBase:: |
protected | function | Gets the unique mappings targets that are used by this parser. | |
FeedsXPathParserBase:: |
public | function | Overrides parent::hasSourceConfig(). | |
FeedsXPathParserBase:: |
public | function | Implements FeedsParser::parse(). | |
FeedsXPathParserBase:: |
protected | function | Parses one item from the context array. | |
FeedsXPathParserBase:: |
public | function | Overrides parent::sourceDefaults(). | |
FeedsXPathParserBase:: |
public | function | Overrides parent::sourceForm(). | |
FeedsXPathParserBase:: |
public | function | Overrides parent::sourceFormValidate(). | |
FeedsXPathParserHTML:: |
protected | property | Whether this version of PHP has a useable saveHTML() method. | |
FeedsXPathParserHTML:: |
protected | function |
Helper callback to return the raw value. Overrides FeedsXPathParserBase:: |
|
FeedsXPathParserHTML:: |
protected | function |
Classes that use FeedsXPathParserBase must implement this. Overrides FeedsXPathParserBase:: |
|
FeedsXPathParserHTML:: |
public | function |