You are here

sheetnode_html.module in Sheetnode 7.2

Module file for the sheetnode_html module.

This extends sheetnodes to enable inmporting of html to sheetnodes.

File

modules/sheetnode_html/sheetnode_html.module
View source
<?php

/**
 * @file
 * Module file for the sheetnode_html module.
 *
 * This extends sheetnodes to enable inmporting of html to sheetnodes.
 */

/**
 * Implementation of hook_menu().
 */
function sheetnode_html_menu() {
  $items['node/add/sheetnode_html'] = array(
    'title' => 'Sheetnode import from HTML page',
    'access arguments' => array(
      'create sheetnode',
    ),
    'page callback' => 'drupal_get_form',
    'page arguments' => array(
      'sheetnode_html_import_form',
    ),
    'description' => 'Create a new sheetnode with table from HTML page.',
  );
  return $items;
}

/**
 * Callback for import form.
 */
function sheetnode_html_import_form() {
  $form['url'] = array(
    '#type' => 'textfield',
    '#title' => t('URL'),
    '#required' => TRUE,
    '#description' => t('URL of the page to parse.'),
  );
  $form['querypath'] = array(
    '#type' => 'textfield',
    '#title' => t('QueryPath'),
    '#description' => t('<a href="@qp">QueryPath expression</a> to find one or more tables to import into a single spreadsheet. For example: <code>table#flat-rates-table</code>.', array(
      '@qp' => 'http://querypath.org/',
    )),
  );
  $form['submit'] = array(
    '#type' => 'submit',
    '#value' => t('Submit'),
  );
  return $form;
}

/**
 * Submit handler for import form.
 */
function sheetnode_html_import_form_submit($form, &$form_state) {
  module_load_include('inc', 'node', 'node.pages');
  global $user;
  $url = $form_state['values']['url'];
  $querypath = $form_state['values']['querypath'];
  $options = array(
    'ignore_spans' => variable_get('sheetnode_html_ignore_spans', FALSE),
  );
  list($title, $sc) = sheetnode_html_import($url, $querypath, $options);
  $node = new StdClass();
  $node->type = 'sheetnode';
  node_object_prepare($node);
  $node->title = $title;
  $node->name = $user->name;
  $node->language = LANGUAGE_NONE;
  $node->sheetnode['value'] = $sc;
  $node->sheetnode['template'] = NULL;

  // Let other modules alter the sheetnode or do other work.
  $context = array(
    'url' => $url,
    'querypath' => $querypath,
    'options' => $options,
  );
  $params = array();
  drupal_alter('sheetnode_import', $node, $params, $context);

  // Save the sheetnode.
  $node = node_submit($node);
  node_save($node);
  if (!empty($node->nid)) {
    $form_state['redirect'] = 'node/' . $node->nid;
  }
}

/**
 * API function to import a URL.
 */
function sheetnode_html_import($url, $querypath, $options = array()) {
  module_load_include('inc', 'sheetnode', 'socialcalc');
  $doc = new DOMDocument('1.0');
  @$doc
    ->loadHTMLFile($url);
  $qp = htmlqp($doc);
  $title = $qp
    ->top('title')
    ->text();
  $sheet = array();
  foreach ($qp
    ->top($querypath) as $table) {
    sheetnode_html_import_table($table, $sheet, $options);
  }
  $socialcalc = array(
    'sheet' => $sheet,
    'edit' => socialcalc_default_edit($sheet),
    'audit' => socialcalc_default_audit($sheet),
  );
  return array(
    $title,
    socialcalc_save($socialcalc),
  );
}

/**
 * API function to import a single table.
 */
function sheetnode_html_import_table($table, &$sheet, $options = array()) {
  $cell = $cells = $spans = array();

  // Col, row.
  $pos = $maxpos = array(
    1,
    @$sheet['attribs']['lastrow'] + 1,
  );

  // Input row.
  $rin = $pos[1];
  foreach ($table
    ->find('tr') as $row) {
    $pos[0] = 1;
    $found_value = FALSE;
    foreach ($row
      ->find('td,th') as $element) {
      while (isset($spans[socialcalc_cr_to_coord($pos[0], $rin)])) {
        $pos[0]++;
      }
      $value = _sheetnode_html_import_value($element
        ->text());
      $cell = array();
      $cell['pos'] = $pos;
      $cell['datavalue'] = $value;
      $cell['datatype'] = is_numeric($value) ? 'v' : 't';
      $cell['valuetype'] = is_numeric($value) ? 'n' : 'th';
      $colspan = 1;
      if ($element
        ->attr('colspan') > 1) {
        $colspan = $element
          ->attr('colspan');
        if (empty($options['ignore_spans'])) {
          $cell['colspan'] = $colspan;
        }
      }
      $rowspan = 1;
      if ($element
        ->attr('rowspan') > 1) {
        $rowspan = $element
          ->attr('rowspan');
        if (empty($options['ignore_spans'])) {
          $cell['rowspan'] = $rowspan;
        }
      }
      if (!empty($value)) {
        $found_value = TRUE;
        $cells[socialcalc_cr_to_coord($pos[0], $pos[1])] = $cell;
      }
      for ($r = $rin + 1; $r < $rin + $rowspan; $r++) {
        $spans[socialcalc_cr_to_coord($pos[0], $r)] = TRUE;
      }
      $pos[0] += $colspan;
      $maxpos[0] = max($maxpos[0], $pos[0]);
    }

    // Advance to next row.
    $rin++;
    if ($found_value || empty($options['skip_empty_rows'])) {
      $pos[1]++;
      $maxpos[1] = max($maxpos[1], $pos[1]);
    }
  }
  $sheet['cells'] = isset($sheet['cells']) ? $sheet['cells'] + $cells : $cells;
  $sheet['attribs']['lastcol'] = max(@$sheet['attribs']['lastcol'], $maxpos[0] - 1);
  $sheet['attribs']['lastrow'] = max(@$sheet['attribs']['lastrow'], $maxpos[1] - 1);
}

/**
 * Utility function to import a cell value.
 */
function _sheetnode_html_import_value($val) {
  $val = str_replace(html_entity_decode('&nbsp;', ENT_COMPAT, 'UTF-8'), ' ', $val);
  $val = trim($val);
  $num = parse_formatted_number($val);
  return $num === FALSE ? $val : $num;
}

Functions

Namesort descending Description
sheetnode_html_import API function to import a URL.
sheetnode_html_import_form Callback for import form.
sheetnode_html_import_form_submit Submit handler for import form.
sheetnode_html_import_table API function to import a single table.
sheetnode_html_menu Implementation of hook_menu().
_sheetnode_html_import_value Utility function to import a cell value.