You are here

class PARSEENTRIES in Bibliography Module 7

Same name and namespace in other branches
  1. 5 bibtexParse/PARSEENTRIES.php \PARSEENTRIES
  2. 6.2 modules/bibtexParse/PARSEENTRIES.php \PARSEENTRIES
  3. 6 bibtexParse/PARSEENTRIES.php \PARSEENTRIES
  4. 7.3 plugins/biblio_style/bibtex/PARSEENTRIES.php \PARSEENTRIES
  5. 7.2 modules/bibtexParse/PARSEENTRIES.php \PARSEENTRIES

// Parse a file $parse = NEW PARSEENTRIES(); $parse->expandMacro = TRUE; // $array = array("RMP" =>"Rev., Mod. Phys."); // $parse->loadStringMacro($array); // $parse->removeDelimit = FALSE; // $parse->fieldExtract = FALSE; $parse->openBib("bib.bib"); $parse->extractEntries(); $parse->closeBib(); list($preamble, $strings, $entries, $undefinedStrings) = $parse->returnArrays(); print_r($preamble); print "\n"; print_r($strings); print "\n"; print_r($entries); print "\n\n";.

// Parse a bibtex PHP string $bibtex_data = <<< END.

@STRING{three = "THREE"} @STRING{two = "TWO"} @string{JRNL23 = {NatLA 23 } # " " # two # " " # three}

@article{klitzing.1, author = "v. Klitzing and Dorda and Pepper", title = "New method for high mark@sirfragalot.com accuracy determination of fine structure constant based on quantized hall resistance", volume = "45", journal = {Journal of } # JRNL23, pages = "494", citeulike-article-id = {12222 } , ignoreMe = {blah}, }

@article { klitzing.2, author = "Klaus von Klitzing", title = "The Quantized Hall Effect", volume = "58", journal = two, pages = "519", }

END;

$parse = NEW PARSEENTRIES(); $parse->expandMacro = TRUE; // $parse->removeDelimit = FALSE; // $parse->fieldExtract = FALSE; $array = array("RMP" =>"Rev., Mod. Phys."); $parse->loadStringMacro($array); $parse->loadBibtexString($bibtex_data); $parse->extractEntries(); list($preamble, $strings, $entries, $undefinedStrings) = $parse->returnArrays(); print_r($preamble); print "\n"; print_r($strings); print "\n"; print_r($entries); print "\n\n";

Hierarchy

Expanded class hierarchy of PARSEENTRIES

2 string references to 'PARSEENTRIES'
biblio_bibtex_biblio_import in modules/bibtexParse/biblio_bibtex.module
_biblio_bibtex_export in modules/bibtexParse/biblio_bibtex.module
Export data in bibtex format.

File

modules/bibtexParse/PARSEENTRIES.php, line 132

View source
class PARSEENTRIES {

  /**
   * @return
   */
  public function __construct() {
    require_once drupal_get_path('module', 'biblio_bibtex') . '/transtab_latex_unicode.inc.php';
    $this->transtab_latex_unicode = get_transtab_latex_unicode();
    $this->preamble = $this->strings = $this->undefinedStrings = $this->entries = array();
    $this->count = 0;
    $this->fieldExtract = TRUE;
    $this->removeDelimit = TRUE;
    $this->expandMacro = FALSE;
    $this->parseFile = TRUE;
    $this->outsideEntry = TRUE;
    $this->translate_latex = TRUE;
  }

  /**
   * Open bib file.
   *
   * @param $file
   *
   * @return none
   */
  public function openBib($file) {
    if (!is_file($file)) {
      die;
    }
    ini_set('auto_detect_line_endings', TRUE);
    $this->fid = fopen($file, 'r');
    $this->parseFile = TRUE;
  }

  /**
   * Load a bibtex string to parse it.
   */
  public function loadBibtexString($bibtex_string) {
    if (is_string($bibtex_string)) {

      // $bibtex_string = $this->searchReplaceText($this->transtab_latex_unicode, $bibtex_string, FALSE);.
      $this->bibtexString = explode("\n", $bibtex_string);
    }
    else {
      $this->bibtexString = $bibtex_string;
    }
    $this->parseFile = FALSE;
    $this->currentLine = 0;
  }

  /**
   *
   */
  public function searchReplaceText($searchReplaceActionsArray, $sourceString, $includesSearchPatternDelimiters = FALSE) {
    $searchStrings = array_keys($searchReplaceActionsArray);
    if (!$includesSearchPatternDelimiters) {
      foreach ($searchStrings as $key => $value) {

        // Add search pattern delimiters.
        $searchStrings[$key] = "/" . $value . "/";
      }
    }
    $replaceStrings = array_values($searchReplaceActionsArray);

    // Apply the search & replace actions defined in '$searchReplaceActionsArray' to the text passed in '$sourceString':
    return preg_replace($searchStrings, $replaceStrings, $sourceString);
  }

  /**
   * Set strings macro.
   */
  public function loadStringMacro($macro_array) {
    $this->userStrings = $macro_array;
  }

  /**
   * Close bib file.
   */
  public function closeBib() {
    fclose($this->fid);
  }

  /**
   * Get a non-empty line from the bib file or from the bibtexString.
   */
  public function getLine() {
    if ($this->parseFile) {
      if (!feof($this->fid)) {
        do {
          $line = trim(fgets($this->fid));
        } while (!feof($this->fid) && !$line);
        return $line;
      }
      return FALSE;
    }
    else {
      do {
        $line = array_shift($this->bibtexString);
        $line = trim($line);
        $this->currentLine++;
      } while ($this->bibtexString && !$line);
      return $line;
    }
  }

  /**
   * Extract value part of @string field enclosed by double-quotes or braces.
   * The string may be expanded with previously-defined strings.
   */
  public function extractStringValue($string) {

    // $string contains a end delimiter, remove it.
    $string = trim(substr($string, 0, strlen($string) - 1));

    // Remove delimiters and expand.
    $string = $this
      ->removeDelimitersAndExpand($string);
    return $string;
  }

  /**
   * Extract a field.
   */
  public function fieldSplit($seg) {

    // Echo "**** ";print_r($seg);echo "<BR>";
    // handle fields like another-field = {}.
    $array = preg_split("/,\\s*([-_.:,a-zA-Z0-9]+)\\s*={1}\\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE);

    // Echo "**** ";print_r($array);echo "<BR>";
    // $array = preg_split("/,\s*(\w+)\s*={1}\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE);.
    if (!array_key_exists(1, $array)) {
      return array(
        $array[0],
        FALSE,
      );
    }
    return array(
      $array[0],
      $array[1],
    );
  }

  /**
   * Extract and format fields.
   */
  public function reduceFields($oldString) {

    // 03/05/2005 G. Gardey. Do not remove all occurences, juste one
    // * correctly parse an entry ended by: somefield = {aValue}}.
    $lg = strlen($oldString);
    if ($oldString[$lg - 1] == "}" || $oldString[$lg - 1] == ")" || $oldString[$lg - 1] == ",") {
      $oldString = substr($oldString, 0, $lg - 1);
    }

    // $oldString = rtrim($oldString, "}),");.
    $split = preg_split("/=/", $oldString, 2);
    $string = $split[1];
    while ($string) {
      list($entry, $string) = $this
        ->fieldSplit($string);
      $values[] = $entry;
    }
    foreach ($values as $value) {
      $pos = strpos($oldString, $value);
      $oldString = substr_replace($oldString, '', $pos, strlen($value));
    }
    $rev = strrev(trim($oldString));
    if ($rev[0] != ',') {
      $oldString .= ',';
    }
    $keys = preg_split("/=,/", $oldString);

    // 22/08/2004 - Mark Grimshaw
    // I have absolutely no idea why this array_pop is required but it is.  Seems to always be
    // an empty key at the end after the split which causes problems if not removed.
    array_pop($keys);
    foreach ($keys as $key) {
      $value = trim(array_shift($values));
      $rev = strrev($value);

      // Remove any dangling ',' left on final field of entry.
      if ($rev[0] == ',') {
        $value = rtrim($value, ",");
      }
      if (!$value) {
        continue;
      }

      // 21/08/2004 G.Gardey -> expand macro
      // Don't remove delimiters now needs to know if the value is a string macro
      // $this->entries[$this->count][strtolower(trim($key))] = trim($this->removeDelimiters(trim($value)));
      $key = strtolower(trim($key));
      $value = trim($value);

      // Handle curly braces; this needs to be done after the bibtex entry has
      // been split into parts so it cannot go into the big transtab replacement.
      $decode_arr = array(
        // Throw away any unescaped curly braces
        // (Zotero adds lots of them, for example)
        // leave them at the start and the end of the string though.
        '@(.)(?<!\\\\){@' => '$1',
        '@(?<!\\\\)}(?!$)@' => '',
        // Decode escaped curly braces.
        '@\\\\{@' => '{',
        '@\\\\}@' => '}',
      );
      $value = preg_replace(array_keys($decode_arr), array_values($decode_arr), $value);
      $this->entries[$this->count][$key] = $value;
    }

    // Echo "**** ";print_r($this->entries[$this->count]);echo "<BR>";.
  }

  /**
   * Start splitting a bibtex entry into component fields.
   * Store the entry type and citation.
   */
  public function fullSplit($entry) {
    $matches = preg_split("/@(.*)[{(](.*),/U", $entry, 2, PREG_SPLIT_DELIM_CAPTURE);
    $this->entries[$this->count]['bibtexEntryType'] = strtolower(trim($matches[1]));

    // Sometimes a bibtex entry will have no citation key
    // this is a field.
    if (preg_match("/=/", $matches[2])) {
      $matches = preg_split("/@(.*)\\s*[{(](.*)/U", $entry, 2, PREG_SPLIT_DELIM_CAPTURE);
    }

    // print_r($matches); print "<P>";.
    $this->entries[$this->count]['bibtexCitation'] = $matches[2];
    $this
      ->reduceFields($matches[3]);
  }

  /**
   * Grab a complete bibtex entry.
   */
  public function parseEntry($entry) {

    // Reset the script timer to avoid timeouts.
    set_time_limit(30);
    $entry = $this->translate_latex ? $this
      ->searchReplaceText($this->transtab_latex_unicode, $entry, FALSE) : $entry;
    $count = 0;
    $lastLine = FALSE;
    if (variable_get('biblio_remove_double_bibtex_braces', 0)) {
      $entry = str_replace('{{', '{', $entry);
      $entry = str_replace('}}', '}', $entry);
    }
    if (preg_match("/@(.*)([{(])/U", preg_quote($entry), $matches)) {
      if (!array_key_exists(1, $matches)) {
        return $lastLine;
      }
      if (preg_match("/string/i", trim($matches[1]))) {
        $this->strings[] = $entry;
      }
      elseif (preg_match("/preamble/i", trim($matches[1]))) {
        $this->preamble[] = $entry;
      }
      elseif (preg_match("/comment/i", $matches[1])) {

        // MG (31/Jan/2006) -- ignore @comment
      }
      else {
        if ($this->fieldExtract) {
          $this
            ->fullSplit($entry);
        }
        else {
          $this->entries[$this->count] = $entry;
        }
        $this->count++;
      }
      return $lastLine;
    }
  }

  /**
   * Remove delimiters from a string.
   */
  public function removeDelimiters($string) {
    if ($string && $string[0] == "\"") {
      $string = substr($string, 1);
      $string = substr($string, 0, -1);
    }
    elseif ($string && $string[0] == "{") {
      if (strlen($string) > 0 && $string[strlen($string) - 1] == "}") {
        $string = substr($string, 1);
        $string = substr($string, 0, -1);
      }
    }
    elseif (!is_numeric($string) && !array_key_exists($string, $this->strings) && array_search($string, $this->undefinedStrings) === FALSE) {

      // Undefined string that is not a year etc.
      $this->undefinedStrings[] = $string;
      return '';
    }
    return $string;
  }

  /**
   * This function works like explode('#',$val) but has to take into account whether
   * the character # is part of a string (i.e., is enclosed into "..." or {...} )
   * or defines a string concatenation as in @string{ "x # x" # ss # {xx{x}x} }.
   */
  public function explodeString($val) {
    $openquote = $bracelevel = $i = $j = 0;
    while ($i < strlen($val)) {
      if ($val[$i] == '"') {
        $openquote = !$openquote;
      }
      elseif ($val[$i] == '{') {
        $bracelevel++;
      }
      elseif ($val[$i] == '}') {
        $bracelevel--;
      }
      elseif ($val[$i] == '#' && !$openquote && !$bracelevel) {
        $strings[] = substr($val, $j, $i - $j);
        $j = $i + 1;
      }
      $i++;
    }
    $strings[] = substr($val, $j);
    return $strings;
  }

  /**
   * This function receives a string and a closing delimiter '}' or ')'
   * and looks for the position of the closing delimiter taking into
   * account the following Bibtex rules:
   *  * Inside the braces, there can arbitrarily nested pairs of braces,
   *    but braces must also be balanced inside quotes!
   *  * Inside quotes, to place the " character it is not sufficient
   *    to simply escape with \": Quotes must be placed inside braces.
   */
  public function closingDelimiter($val, $delimitEnd) {

    // Echo "####>$delimitEnd $val<BR>";.
    $openquote = $bracelevel = $i = $j = 0;
    while ($i < strlen($val)) {

      // A '"' found at brace level 0 defines a value such as "ss{\"o}ss".
      if ($val[$i] == '"' && !$bracelevel) {
        $openquote = !$openquote;
      }
      elseif ($val[$i] == '{') {
        $bracelevel++;
      }
      elseif ($val[$i] == '}') {
        $bracelevel--;
      }
      if ($val[$i] == $delimitEnd && !$openquote && !$bracelevel) {
        return $i;
      }
      $i++;
    }

    // Echo "--> $bracelevel, $openquote";.
    return 0;
  }

  /**
   * Remove enclosures around entry field values.  Additionally, expand macros if flag set.
   */
  public function removeDelimitersAndExpand($string, $inpreamble = FALSE) {

    // Only expand the macro if flag set, if strings defined and not in preamble.
    if (!$this->expandMacro || empty($this->strings) || $inpreamble) {
      $string = $this
        ->removeDelimiters($string);
    }
    else {
      $stringlist = $this
        ->explodeString($string);
      $string = "";
      foreach ($stringlist as $str) {

        // Trim the string since usually # is enclosed by spaces.
        $str = trim($str);

        // Replace the string if macro is already defined
        // strtolower is used since macros are case insensitive.
        if (isset($this->strings[strtolower($str)])) {
          $string .= $this->strings[strtolower($str)];
        }
        else {
          $string .= $this
            ->removeDelimiters(trim($str));
        }
      }
    }
    return $string;
  }

  /**
   * This function extract entries taking into account how comments are defined in BibTeX.
   * BibTeX splits the file in two areas: inside an entry and outside an entry, the delimitation
   * being indicated by the presence of a @ sign. When this character is met, BibTeX expects to
   * find an entry. Before that sign, and after an entry, everything is considered a comment!
   */
  public function extractEntries() {
    $inside = $possibleEntryStart = FALSE;
    $entry = "";
    while ($line = $this
      ->getLine()) {
      if ($possibleEntryStart) {
        $line = $possibleEntryStart . $line;
      }
      if (!$inside && strchr($line, "@")) {

        // Throw all characters before the '@'.
        $line = strstr($line, '@');
        if (!strchr($line, "{") && !strchr($line, "(")) {
          $possibleEntryStart = $line;
        }
        elseif (preg_match("/@.*([{(])/U", preg_quote($line), $matches)) {
          $inside = TRUE;
          if ($matches[1] == '{') {
            $delimitEnd = '}';
          }
          else {
            $delimitEnd = ')';
          }
          $possibleEntryStart = FALSE;
        }
      }
      if ($inside) {
        $entry .= " " . $line;
        if ($j = $this
          ->closingDelimiter($entry, $delimitEnd)) {

          // All characters after the delimiter are thrown but the remaining
          // characters must be kept since they may start the next entry !!!
          $lastLine = substr($entry, $j + 1);
          $entry = substr($entry, 0, $j + 1);

          // Strip excess whitespaces from the entry.
          $entry = preg_replace('/\\s\\s+/', ' ', $entry);
          $this
            ->parseEntry($entry);
          $entry = strchr($lastLine, "@");
          if ($entry) {
            $inside = TRUE;
          }
          else {
            $inside = FALSE;
          }
        }
      }
    }
  }

  /**
   * Return arrays of entries etc. to the calling process.
   */
  public function returnArrays() {

    // Global $transtab_latex_unicode; // defined in 'transtab_latex_unicode.inc.php'.
    foreach ($this->preamble as $value) {
      preg_match("/.*?[{(](.*)/", $value, $matches);
      $preamble = substr($matches[1], 0, -1);
      $preambles['bibtexPreamble'] = trim($this
        ->removeDelimitersAndExpand(trim($preamble), TRUE));
    }
    if (isset($preambles)) {
      $this->preamble = $preambles;
    }
    if ($this->fieldExtract) {

      // Next lines must take into account strings defined by previously-defined strings.
      $strings = $this->strings;

      // $this->strings is initialized with strings provided by user if they exists
      // it is supposed that there are no substitutions to be made in the user strings, i.e., no #.
      $this->strings = isset($this->userStrings) ? $this->userStrings : array();
      foreach ($strings as $value) {

        // Changed 21/08/2004 G. Gardey
        // 23/08/2004 Mark G. account for comments on same line as @string - count delimiters in string value.
        $value = trim($value);
        $matches = preg_split("/@\\s*string\\s*([{(])/i", $value, 2, PREG_SPLIT_DELIM_CAPTURE);
        $delimit = $matches[1];
        $matches = preg_split("/=/", $matches[2], 2, PREG_SPLIT_DELIM_CAPTURE);

        // Macros are case insensitive.
        $this->strings[strtolower(trim($matches[0]))] = $this
          ->extractStringValue($matches[1]);
      }
    }

    // Changed 21/08/2004 G. Gardey
    // 22/08/2004 Mark Grimshaw - stopped useless looping.
    // removeDelimit and expandMacro have NO effect if !$this->fieldExtract.
    if ($this->removeDelimit || $this->expandMacro && $this->fieldExtract) {
      for ($i = 0; $i < count($this->entries); $i++) {
        foreach ($this->entries[$i] as $key => $value) {

          // 02/05/2005 G. Gardey don't expand macro for bibtexCitation
          // and bibtexEntryType.
          if ($key != 'bibtexCitation' && $key != 'bibtexEntryType') {
            $this->entries[$i][$key] = trim($this
              ->removeDelimitersAndExpand($this->entries[$i][$key]));
          }
        }
      }
    }

    // EZ: Remove this to be able to use the same instance for parsing several files,
    // e.g., parsing a entry file with its associated abbreviation file
    //    if (empty($this->preamble))
    //      $this->preamble = FALSE;
    //    if (empty($this->strings))
    //      $this->strings = FALSE;
    //    if (empty($this->entries))
    //      $this->entries = FALSE;.
    return array(
      $this->preamble,
      $this->strings,
      $this->entries,
      $this->undefinedStrings,
    );
  }

  /**
   *
   */
  public function &getEntries() {
    if ($this->removeDelimit || $this->expandMacro && $this->fieldExtract) {
      for ($i = 0; $i < count($this->entries); $i++) {
        foreach ($this->entries[$i] as $key => $value) {

          // 02/05/2005 G. Gardey don't expand macro for bibtexCitation
          // and bibtexEntryType.
          if ($key != 'bibtexCitation' && $key != 'bibtexEntryType') {
            $this->entries[$i][$key] = trim($this
              ->removeDelimitersAndExpand($this->entries[$i][$key]));
          }
        }
      }
    }
    return $this->entries;
  }

}

Members

Namesort descending Modifiers Type Description Overrides
PARSEENTRIES::closeBib public function Close bib file.
PARSEENTRIES::closingDelimiter public function This function receives a string and a closing delimiter '}' or ')' and looks for the position of the closing delimiter taking into account the following Bibtex rules:
PARSEENTRIES::explodeString public function This function works like explode('#',$val) but has to take into account whether the character # is part of a string (i.e., is enclosed into "..." or {...} ) or defines a string concatenation as in @string{ "x # x" # ss #…
PARSEENTRIES::extractEntries public function This function extract entries taking into account how comments are defined in BibTeX. BibTeX splits the file in two areas: inside an entry and outside an entry, the delimitation being indicated by the presence of a @ sign. When this character is met,…
PARSEENTRIES::extractStringValue public function Extract value part of @string field enclosed by double-quotes or braces. The string may be expanded with previously-defined strings.
PARSEENTRIES::fieldSplit public function Extract a field.
PARSEENTRIES::fullSplit public function Start splitting a bibtex entry into component fields. Store the entry type and citation.
PARSEENTRIES::getEntries public function
PARSEENTRIES::getLine public function Get a non-empty line from the bib file or from the bibtexString.
PARSEENTRIES::loadBibtexString public function Load a bibtex string to parse it.
PARSEENTRIES::loadStringMacro public function Set strings macro.
PARSEENTRIES::openBib public function Open bib file.
PARSEENTRIES::parseEntry public function Grab a complete bibtex entry.
PARSEENTRIES::reduceFields public function Extract and format fields.
PARSEENTRIES::removeDelimiters public function Remove delimiters from a string.
PARSEENTRIES::removeDelimitersAndExpand public function Remove enclosures around entry field values. Additionally, expand macros if flag set.
PARSEENTRIES::returnArrays public function Return arrays of entries etc. to the calling process.
PARSEENTRIES::searchReplaceText public function
PARSEENTRIES::__construct public function