You are here

function biblio_advanced_import_pitfall_workarounds in Biblio Advanced Import 7

Same name and namespace in other branches
  1. 6 biblio_advanced_import.module \biblio_advanced_import_pitfall_workarounds()

This function implements some optional data cleanup / normalization that can be activated on the "advanced import" tab.

1 call to biblio_advanced_import_pitfall_workarounds()
biblio_advanced_import_node_presave in ./biblio_advanced_import.module
Implements hook_node_presave().

File

./biblio_advanced_import.module, line 366
Biblio add-on.

Code

function biblio_advanced_import_pitfall_workarounds(&$node) {
  switch (variable_get('biblio_advanced_import_fix_issn', 'as is')) {
    case 'as is':
      break;
    case 'normalize from isbn':
      if (empty($node->biblio_issn) || !empty($node->biblio_isbn)) {

        // RIS format does not distinguish between ISBN and ISSN
        $node->biblio_issn = $node->biblio_isbn;
      }

    // no break
    case 'normalize':

      // @see http://en.wikipedia.org/wiki/International_Standard_Serial_Number
      if (!empty($node->biblio_issn)) {
        if (preg_match("@\\b([0-9]{4})-?([0-9X]{4})\\b@i", $node->biblio_issn, $matches)) {
          $issn = strtoupper($matches[1] . $matches[2]);
          $sum = 0;
          for ($i = 0; $i < 7; $i++) {
            $sum += $issn[$i] * (8 - $i);
          }
          $checksum = 11 - $sum % 11;
          if ($checksum == $issn[7] || 10 == $checksum && 'X' == $issn[7]) {
            $node->biblio_issn = $issn;
          }
          else {
            unset($node->biblio_issn);
          }
        }
        else {
          unset($node->biblio_issn);
        }
      }
      break;
  }
  switch (variable_get('biblio_advanced_import_fix_isbn', 'as is')) {
    case 'as is':
      break;
    case 'remove':

      // @see http://en.wikipedia.org/wiki/International_Standard_Book_Number
      if (!empty($node->biblio_isbn)) {
        module_load_include('class.php', 'biblio_advanced_import', 'lib/isbntest');
        $currISBN = new ISBNtest();
        $currISBN
          ->set_isbn($matches[0]);
        if ($currISBN
          ->valid_isbn10() || $currISBN
          ->valid_isbn13() || $currISBN
          ->valid_gtin14()) {
          $node->biblio_isbn = $currISBN
            ->get_gtin14();
        }
        else {
          unset($node->biblio_isbn);
        }
      }
      break;
    case 'convert 13':

      // @see http://en.wikipedia.org/wiki/International_Standard_Book_Number
      if (!empty($node->biblio_isbn)) {
        if (preg_match("@[0-9\\-]{10,}@", $node->biblio_isbn, $matches)) {
          module_load_include('class.php', 'biblio_advanced_import', 'lib/isbntest');
          $currISBN = new ISBNtest();
          $currISBN
            ->set_isbn($matches[0]);
          if ($currISBN
            ->valid_isbn13()) {
            $node->biblio_isbn = $currISBN
              ->get_isbn13();
          }
          elseif ($currISBN
            ->valid_gtin14()) {
            $node->biblio_isbn = $currISBN
              ->get_gtin14();
          }
          else {
            unset($node->biblio_isbn);
          }
        }
        else {
          unset($node->biblio_isbn);
        }
      }
      break;
  }
  switch (variable_get('biblio_advanced_import_fix_doi', 'as is')) {
    case 'as is':
      break;
    case 'one valid':

      // @see http://en.wikipedia.org/wiki/Digital_object_identifier
      if (!empty($node->biblio_doi)) {
        if (preg_match("@10\\.\\d{4,}/[^\\s]+@i", $node->biblio_doi, $matches)) {
          $node->biblio_doi = $matches[0];
        }
        else {
          unset($node->biblio_doi);
        }
      }
      break;
  }
  switch (variable_get('biblio_advanced_import_fix_title', 'as is')) {
    case 'as is':
      break;
    case 'mendeley bibtex':
      if (!empty($node->title)) {

        // strip off enclosing curly braces, but only a matching pair
        $node->title = preg_replace('@^\\{(.*)\\}$@', '$1', $node->title);
      }
      break;
  }
  switch (variable_get('biblio_advanced_import_fix_url', 'as is')) {
    case 'as is':
      break;
    case 'one valid':
      if (!empty($node->biblio_url)) {
        if (preg_match("@(http|https)://[^\\s]+@i", $node->biblio_url, $matches)) {

          // ris import runs together lists of urls without a delimiter
          $urls = explode('http', str_replace(array(
            'HTTP:',
            'HTTPS:',
          ), array(
            'http:',
            'https:',
          ), $matches[0]));
          $node->biblio_url = 'http' . $urls[1];
        }
        else {
          unset($node->biblio_url);
        }
      }
      break;
  }
}