QueryPathHtmlParser.php in Feeds extensible parsers 8
File
src/Feeds/Parser/QueryPathHtmlParser.php
View source
<?php
namespace Drupal\feeds_ex\Feeds\Parser;
use Drupal\feeds\FeedInterface;
use Drupal\feeds\Result\FetcherResultInterface;
use Drupal\feeds\StateInterface;
use QueryPath\DOMQuery;
class QueryPathHtmlParser extends QueryPathXmlParser {
protected $encoderClass = '\\Drupal\\feeds_ex\\Encoder\\HtmlEncoder';
protected function setUp(FeedInterface $feed, FetcherResultInterface $fetcher_result, StateInterface $state) {
$this->queryPathOptions['use_parser'] = 'html';
}
protected function getRawValue(DOMQuery $node) {
return $node
->html();
}
protected function prepareDocument(FeedInterface $feed, FetcherResultInterface $fetcher_result) {
$raw = $this
->prepareRaw($fetcher_result);
if ($this->configuration['use_tidy'] && extension_loaded('tidy')) {
$raw = tidy_repair_string($raw, $this
->getTidyConfig(), 'utf8');
}
return $this->utility
->createHtmlDocument($raw);
}
protected function getTidyConfig() {
return [
'merge-divs' => FALSE,
'merge-spans' => FALSE,
'join-styles' => FALSE,
'drop-empty-paras' => FALSE,
'wrap' => 0,
'tidy-mark' => FALSE,
'escape-cdata' => TRUE,
'word-2000' => TRUE,
];
}
}