You are here

protected function FeedsXPathParserHTML::setup in Feeds XPath Parser 6

Same name and namespace in other branches
  1. 7 FeedsXPathParserHTML.inc \FeedsXPathParserHTML::setup()

Implementation of FeedsXPathParserBase::setup().

Overrides FeedsXPathParserBase::setup

File

./FeedsXPathParserHTML.inc, line 16
s Provides the FeedsXPathParserHTML class.

Class

FeedsXPathParserHTML
Parse HTML using XPath.

Code

protected function setup($source_config, FeedsImportBatch $batch) {
  if (!empty($source_config['exp']['tidy'])) {
    $config = array(
      'merge-divs' => FALSE,
      'merge-spans' => FALSE,
      'join-styles' => FALSE,
      'drop-empty-paras' => FALSE,
      'wrap' => 0,
      'tidy-mark' => FALSE,
      'escape-cdata' => TRUE,
      'word-2000' => TRUE,
    );

    // Default tidy encoding is UTF8.
    $encoding = $source_config['exp']['tidy_encoding'];
    $raw = tidy_repair_string(trim($batch
      ->getRaw()), $config, $encoding);
  }
  else {
    $raw = $batch
      ->getRaw();
  }
  $doc = new DOMDocument();

  // Use our own error handling.
  $use = $this
    ->errorStart();
  $success = $doc
    ->loadHTML($raw);
  unset($raw);
  $this
    ->errorStop($use, $source_config['exp']['errors']);
  if (!$success) {
    throw new Exception(t('There was an error parsing the HTML document.'));
  }
  return $doc;
}