class PARSEENTRIES in Bibliography Module 7
Same name and namespace in other branches
- 5 bibtexParse/PARSEENTRIES.php \PARSEENTRIES
- 6.2 modules/bibtexParse/PARSEENTRIES.php \PARSEENTRIES
- 6 bibtexParse/PARSEENTRIES.php \PARSEENTRIES
- 7.3 plugins/biblio_style/bibtex/PARSEENTRIES.php \PARSEENTRIES
- 7.2 modules/bibtexParse/PARSEENTRIES.php \PARSEENTRIES
// Parse a file $parse = NEW PARSEENTRIES(); $parse->expandMacro = TRUE; // $array = array("RMP" =>"Rev., Mod. Phys."); // $parse->loadStringMacro($array); // $parse->removeDelimit = FALSE; // $parse->fieldExtract = FALSE; $parse->openBib("bib.bib"); $parse->extractEntries(); $parse->closeBib(); list($preamble, $strings, $entries, $undefinedStrings) = $parse->returnArrays(); print_r($preamble); print "\n"; print_r($strings); print "\n"; print_r($entries); print "\n\n";.
// Parse a bibtex PHP string $bibtex_data = <<< END.
@STRING{three = "THREE"} @STRING{two = "TWO"} @string{JRNL23 = {NatLA 23 } # " " # two # " " # three}
@article{klitzing.1, author = "v. Klitzing and Dorda and Pepper", title = "New method for high mark@sirfragalot.com accuracy determination of fine structure constant based on quantized hall resistance", volume = "45", journal = {Journal of } # JRNL23, pages = "494", citeulike-article-id = {12222 } , ignoreMe = {blah}, }
@article { klitzing.2, author = "Klaus von Klitzing", title = "The Quantized Hall Effect", volume = "58", journal = two, pages = "519", }
END;
$parse = NEW PARSEENTRIES(); $parse->expandMacro = TRUE; // $parse->removeDelimit = FALSE; // $parse->fieldExtract = FALSE; $array = array("RMP" =>"Rev., Mod. Phys."); $parse->loadStringMacro($array); $parse->loadBibtexString($bibtex_data); $parse->extractEntries(); list($preamble, $strings, $entries, $undefinedStrings) = $parse->returnArrays(); print_r($preamble); print "\n"; print_r($strings); print "\n"; print_r($entries); print "\n\n";
Hierarchy
- class \PARSEENTRIES
Expanded class hierarchy of PARSEENTRIES
2 string references to 'PARSEENTRIES'
- biblio_bibtex_biblio_import in modules/
bibtexParse/ biblio_bibtex.module - _biblio_bibtex_export in modules/
bibtexParse/ biblio_bibtex.module - Export data in bibtex format.
File
- modules/
bibtexParse/ PARSEENTRIES.php, line 132
View source
class PARSEENTRIES {
/**
* @return
*/
public function __construct() {
require_once drupal_get_path('module', 'biblio_bibtex') . '/transtab_latex_unicode.inc.php';
$this->transtab_latex_unicode = get_transtab_latex_unicode();
$this->preamble = $this->strings = $this->undefinedStrings = $this->entries = array();
$this->count = 0;
$this->fieldExtract = TRUE;
$this->removeDelimit = TRUE;
$this->expandMacro = FALSE;
$this->parseFile = TRUE;
$this->outsideEntry = TRUE;
$this->translate_latex = TRUE;
}
/**
* Open bib file.
*
* @param $file
*
* @return none
*/
public function openBib($file) {
if (!is_file($file)) {
die;
}
ini_set('auto_detect_line_endings', TRUE);
$this->fid = fopen($file, 'r');
$this->parseFile = TRUE;
}
/**
* Load a bibtex string to parse it.
*/
public function loadBibtexString($bibtex_string) {
if (is_string($bibtex_string)) {
// $bibtex_string = $this->searchReplaceText($this->transtab_latex_unicode, $bibtex_string, FALSE);.
$this->bibtexString = explode("\n", $bibtex_string);
}
else {
$this->bibtexString = $bibtex_string;
}
$this->parseFile = FALSE;
$this->currentLine = 0;
}
/**
*
*/
public function searchReplaceText($searchReplaceActionsArray, $sourceString, $includesSearchPatternDelimiters = FALSE) {
$searchStrings = array_keys($searchReplaceActionsArray);
if (!$includesSearchPatternDelimiters) {
foreach ($searchStrings as $key => $value) {
// Add search pattern delimiters.
$searchStrings[$key] = "/" . $value . "/";
}
}
$replaceStrings = array_values($searchReplaceActionsArray);
// Apply the search & replace actions defined in '$searchReplaceActionsArray' to the text passed in '$sourceString':
return preg_replace($searchStrings, $replaceStrings, $sourceString);
}
/**
* Set strings macro.
*/
public function loadStringMacro($macro_array) {
$this->userStrings = $macro_array;
}
/**
* Close bib file.
*/
public function closeBib() {
fclose($this->fid);
}
/**
* Get a non-empty line from the bib file or from the bibtexString.
*/
public function getLine() {
if ($this->parseFile) {
if (!feof($this->fid)) {
do {
$line = trim(fgets($this->fid));
} while (!feof($this->fid) && !$line);
return $line;
}
return FALSE;
}
else {
do {
$line = array_shift($this->bibtexString);
$line = trim($line);
$this->currentLine++;
} while ($this->bibtexString && !$line);
return $line;
}
}
/**
* Extract value part of @string field enclosed by double-quotes or braces.
* The string may be expanded with previously-defined strings.
*/
public function extractStringValue($string) {
// $string contains a end delimiter, remove it.
$string = trim(substr($string, 0, strlen($string) - 1));
// Remove delimiters and expand.
$string = $this
->removeDelimitersAndExpand($string);
return $string;
}
/**
* Extract a field.
*/
public function fieldSplit($seg) {
// Echo "**** ";print_r($seg);echo "<BR>";
// handle fields like another-field = {}.
$array = preg_split("/,\\s*([-_.:,a-zA-Z0-9]+)\\s*={1}\\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE);
// Echo "**** ";print_r($array);echo "<BR>";
// $array = preg_split("/,\s*(\w+)\s*={1}\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE);.
if (!array_key_exists(1, $array)) {
return array(
$array[0],
FALSE,
);
}
return array(
$array[0],
$array[1],
);
}
/**
* Extract and format fields.
*/
public function reduceFields($oldString) {
// 03/05/2005 G. Gardey. Do not remove all occurences, juste one
// * correctly parse an entry ended by: somefield = {aValue}}.
$lg = strlen($oldString);
if ($oldString[$lg - 1] == "}" || $oldString[$lg - 1] == ")" || $oldString[$lg - 1] == ",") {
$oldString = substr($oldString, 0, $lg - 1);
}
// $oldString = rtrim($oldString, "}),");.
$split = preg_split("/=/", $oldString, 2);
$string = $split[1];
while ($string) {
list($entry, $string) = $this
->fieldSplit($string);
$values[] = $entry;
}
foreach ($values as $value) {
$pos = strpos($oldString, $value);
$oldString = substr_replace($oldString, '', $pos, strlen($value));
}
$rev = strrev(trim($oldString));
if ($rev[0] != ',') {
$oldString .= ',';
}
$keys = preg_split("/=,/", $oldString);
// 22/08/2004 - Mark Grimshaw
// I have absolutely no idea why this array_pop is required but it is. Seems to always be
// an empty key at the end after the split which causes problems if not removed.
array_pop($keys);
foreach ($keys as $key) {
$value = trim(array_shift($values));
$rev = strrev($value);
// Remove any dangling ',' left on final field of entry.
if ($rev[0] == ',') {
$value = rtrim($value, ",");
}
if (!$value) {
continue;
}
// 21/08/2004 G.Gardey -> expand macro
// Don't remove delimiters now needs to know if the value is a string macro
// $this->entries[$this->count][strtolower(trim($key))] = trim($this->removeDelimiters(trim($value)));
$key = strtolower(trim($key));
$value = trim($value);
// Handle curly braces; this needs to be done after the bibtex entry has
// been split into parts so it cannot go into the big transtab replacement.
$decode_arr = array(
// Throw away any unescaped curly braces
// (Zotero adds lots of them, for example)
// leave them at the start and the end of the string though.
'@(.)(?<!\\\\){@' => '$1',
'@(?<!\\\\)}(?!$)@' => '',
// Decode escaped curly braces.
'@\\\\{@' => '{',
'@\\\\}@' => '}',
);
$value = preg_replace(array_keys($decode_arr), array_values($decode_arr), $value);
$this->entries[$this->count][$key] = $value;
}
// Echo "**** ";print_r($this->entries[$this->count]);echo "<BR>";.
}
/**
* Start splitting a bibtex entry into component fields.
* Store the entry type and citation.
*/
public function fullSplit($entry) {
$matches = preg_split("/@(.*)[{(](.*),/U", $entry, 2, PREG_SPLIT_DELIM_CAPTURE);
$this->entries[$this->count]['bibtexEntryType'] = strtolower(trim($matches[1]));
// Sometimes a bibtex entry will have no citation key
// this is a field.
if (preg_match("/=/", $matches[2])) {
$matches = preg_split("/@(.*)\\s*[{(](.*)/U", $entry, 2, PREG_SPLIT_DELIM_CAPTURE);
}
// print_r($matches); print "<P>";.
$this->entries[$this->count]['bibtexCitation'] = $matches[2];
$this
->reduceFields($matches[3]);
}
/**
* Grab a complete bibtex entry.
*/
public function parseEntry($entry) {
// Reset the script timer to avoid timeouts.
set_time_limit(30);
$entry = $this->translate_latex ? $this
->searchReplaceText($this->transtab_latex_unicode, $entry, FALSE) : $entry;
$count = 0;
$lastLine = FALSE;
if (variable_get('biblio_remove_double_bibtex_braces', 0)) {
$entry = str_replace('{{', '{', $entry);
$entry = str_replace('}}', '}', $entry);
}
if (preg_match("/@(.*)([{(])/U", preg_quote($entry), $matches)) {
if (!array_key_exists(1, $matches)) {
return $lastLine;
}
if (preg_match("/string/i", trim($matches[1]))) {
$this->strings[] = $entry;
}
elseif (preg_match("/preamble/i", trim($matches[1]))) {
$this->preamble[] = $entry;
}
elseif (preg_match("/comment/i", $matches[1])) {
// MG (31/Jan/2006) -- ignore @comment
}
else {
if ($this->fieldExtract) {
$this
->fullSplit($entry);
}
else {
$this->entries[$this->count] = $entry;
}
$this->count++;
}
return $lastLine;
}
}
/**
* Remove delimiters from a string.
*/
public function removeDelimiters($string) {
if ($string && $string[0] == "\"") {
$string = substr($string, 1);
$string = substr($string, 0, -1);
}
elseif ($string && $string[0] == "{") {
if (strlen($string) > 0 && $string[strlen($string) - 1] == "}") {
$string = substr($string, 1);
$string = substr($string, 0, -1);
}
}
elseif (!is_numeric($string) && !array_key_exists($string, $this->strings) && array_search($string, $this->undefinedStrings) === FALSE) {
// Undefined string that is not a year etc.
$this->undefinedStrings[] = $string;
return '';
}
return $string;
}
/**
* This function works like explode('#',$val) but has to take into account whether
* the character # is part of a string (i.e., is enclosed into "..." or {...} )
* or defines a string concatenation as in @string{ "x # x" # ss # {xx{x}x} }.
*/
public function explodeString($val) {
$openquote = $bracelevel = $i = $j = 0;
while ($i < strlen($val)) {
if ($val[$i] == '"') {
$openquote = !$openquote;
}
elseif ($val[$i] == '{') {
$bracelevel++;
}
elseif ($val[$i] == '}') {
$bracelevel--;
}
elseif ($val[$i] == '#' && !$openquote && !$bracelevel) {
$strings[] = substr($val, $j, $i - $j);
$j = $i + 1;
}
$i++;
}
$strings[] = substr($val, $j);
return $strings;
}
/**
* This function receives a string and a closing delimiter '}' or ')'
* and looks for the position of the closing delimiter taking into
* account the following Bibtex rules:
* * Inside the braces, there can arbitrarily nested pairs of braces,
* but braces must also be balanced inside quotes!
* * Inside quotes, to place the " character it is not sufficient
* to simply escape with \": Quotes must be placed inside braces.
*/
public function closingDelimiter($val, $delimitEnd) {
// Echo "####>$delimitEnd $val<BR>";.
$openquote = $bracelevel = $i = $j = 0;
while ($i < strlen($val)) {
// A '"' found at brace level 0 defines a value such as "ss{\"o}ss".
if ($val[$i] == '"' && !$bracelevel) {
$openquote = !$openquote;
}
elseif ($val[$i] == '{') {
$bracelevel++;
}
elseif ($val[$i] == '}') {
$bracelevel--;
}
if ($val[$i] == $delimitEnd && !$openquote && !$bracelevel) {
return $i;
}
$i++;
}
// Echo "--> $bracelevel, $openquote";.
return 0;
}
/**
* Remove enclosures around entry field values. Additionally, expand macros if flag set.
*/
public function removeDelimitersAndExpand($string, $inpreamble = FALSE) {
// Only expand the macro if flag set, if strings defined and not in preamble.
if (!$this->expandMacro || empty($this->strings) || $inpreamble) {
$string = $this
->removeDelimiters($string);
}
else {
$stringlist = $this
->explodeString($string);
$string = "";
foreach ($stringlist as $str) {
// Trim the string since usually # is enclosed by spaces.
$str = trim($str);
// Replace the string if macro is already defined
// strtolower is used since macros are case insensitive.
if (isset($this->strings[strtolower($str)])) {
$string .= $this->strings[strtolower($str)];
}
else {
$string .= $this
->removeDelimiters(trim($str));
}
}
}
return $string;
}
/**
* This function extract entries taking into account how comments are defined in BibTeX.
* BibTeX splits the file in two areas: inside an entry and outside an entry, the delimitation
* being indicated by the presence of a @ sign. When this character is met, BibTeX expects to
* find an entry. Before that sign, and after an entry, everything is considered a comment!
*/
public function extractEntries() {
$inside = $possibleEntryStart = FALSE;
$entry = "";
while ($line = $this
->getLine()) {
if ($possibleEntryStart) {
$line = $possibleEntryStart . $line;
}
if (!$inside && strchr($line, "@")) {
// Throw all characters before the '@'.
$line = strstr($line, '@');
if (!strchr($line, "{") && !strchr($line, "(")) {
$possibleEntryStart = $line;
}
elseif (preg_match("/@.*([{(])/U", preg_quote($line), $matches)) {
$inside = TRUE;
if ($matches[1] == '{') {
$delimitEnd = '}';
}
else {
$delimitEnd = ')';
}
$possibleEntryStart = FALSE;
}
}
if ($inside) {
$entry .= " " . $line;
if ($j = $this
->closingDelimiter($entry, $delimitEnd)) {
// All characters after the delimiter are thrown but the remaining
// characters must be kept since they may start the next entry !!!
$lastLine = substr($entry, $j + 1);
$entry = substr($entry, 0, $j + 1);
// Strip excess whitespaces from the entry.
$entry = preg_replace('/\\s\\s+/', ' ', $entry);
$this
->parseEntry($entry);
$entry = strchr($lastLine, "@");
if ($entry) {
$inside = TRUE;
}
else {
$inside = FALSE;
}
}
}
}
}
/**
* Return arrays of entries etc. to the calling process.
*/
public function returnArrays() {
// Global $transtab_latex_unicode; // defined in 'transtab_latex_unicode.inc.php'.
foreach ($this->preamble as $value) {
preg_match("/.*?[{(](.*)/", $value, $matches);
$preamble = substr($matches[1], 0, -1);
$preambles['bibtexPreamble'] = trim($this
->removeDelimitersAndExpand(trim($preamble), TRUE));
}
if (isset($preambles)) {
$this->preamble = $preambles;
}
if ($this->fieldExtract) {
// Next lines must take into account strings defined by previously-defined strings.
$strings = $this->strings;
// $this->strings is initialized with strings provided by user if they exists
// it is supposed that there are no substitutions to be made in the user strings, i.e., no #.
$this->strings = isset($this->userStrings) ? $this->userStrings : array();
foreach ($strings as $value) {
// Changed 21/08/2004 G. Gardey
// 23/08/2004 Mark G. account for comments on same line as @string - count delimiters in string value.
$value = trim($value);
$matches = preg_split("/@\\s*string\\s*([{(])/i", $value, 2, PREG_SPLIT_DELIM_CAPTURE);
$delimit = $matches[1];
$matches = preg_split("/=/", $matches[2], 2, PREG_SPLIT_DELIM_CAPTURE);
// Macros are case insensitive.
$this->strings[strtolower(trim($matches[0]))] = $this
->extractStringValue($matches[1]);
}
}
// Changed 21/08/2004 G. Gardey
// 22/08/2004 Mark Grimshaw - stopped useless looping.
// removeDelimit and expandMacro have NO effect if !$this->fieldExtract.
if ($this->removeDelimit || $this->expandMacro && $this->fieldExtract) {
for ($i = 0; $i < count($this->entries); $i++) {
foreach ($this->entries[$i] as $key => $value) {
// 02/05/2005 G. Gardey don't expand macro for bibtexCitation
// and bibtexEntryType.
if ($key != 'bibtexCitation' && $key != 'bibtexEntryType') {
$this->entries[$i][$key] = trim($this
->removeDelimitersAndExpand($this->entries[$i][$key]));
}
}
}
}
// EZ: Remove this to be able to use the same instance for parsing several files,
// e.g., parsing a entry file with its associated abbreviation file
// if (empty($this->preamble))
// $this->preamble = FALSE;
// if (empty($this->strings))
// $this->strings = FALSE;
// if (empty($this->entries))
// $this->entries = FALSE;.
return array(
$this->preamble,
$this->strings,
$this->entries,
$this->undefinedStrings,
);
}
/**
*
*/
public function &getEntries() {
if ($this->removeDelimit || $this->expandMacro && $this->fieldExtract) {
for ($i = 0; $i < count($this->entries); $i++) {
foreach ($this->entries[$i] as $key => $value) {
// 02/05/2005 G. Gardey don't expand macro for bibtexCitation
// and bibtexEntryType.
if ($key != 'bibtexCitation' && $key != 'bibtexEntryType') {
$this->entries[$i][$key] = trim($this
->removeDelimitersAndExpand($this->entries[$i][$key]));
}
}
}
}
return $this->entries;
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
PARSEENTRIES:: |
public | function | Close bib file. | |
PARSEENTRIES:: |
public | function | This function receives a string and a closing delimiter '}' or ')' and looks for the position of the closing delimiter taking into account the following Bibtex rules: | |
PARSEENTRIES:: |
public | function | This function works like explode('#',$val) but has to take into account whether the character # is part of a string (i.e., is enclosed into "..." or {...} ) or defines a string concatenation as in @string{ "x # x" # ss #… | |
PARSEENTRIES:: |
public | function | This function extract entries taking into account how comments are defined in BibTeX. BibTeX splits the file in two areas: inside an entry and outside an entry, the delimitation being indicated by the presence of a @ sign. When this character is met,… | |
PARSEENTRIES:: |
public | function | Extract value part of @string field enclosed by double-quotes or braces. The string may be expanded with previously-defined strings. | |
PARSEENTRIES:: |
public | function | Extract a field. | |
PARSEENTRIES:: |
public | function | Start splitting a bibtex entry into component fields. Store the entry type and citation. | |
PARSEENTRIES:: |
public | function | ||
PARSEENTRIES:: |
public | function | Get a non-empty line from the bib file or from the bibtexString. | |
PARSEENTRIES:: |
public | function | Load a bibtex string to parse it. | |
PARSEENTRIES:: |
public | function | Set strings macro. | |
PARSEENTRIES:: |
public | function | Open bib file. | |
PARSEENTRIES:: |
public | function | Grab a complete bibtex entry. | |
PARSEENTRIES:: |
public | function | Extract and format fields. | |
PARSEENTRIES:: |
public | function | Remove delimiters from a string. | |
PARSEENTRIES:: |
public | function | Remove enclosures around entry field values. Additionally, expand macros if flag set. | |
PARSEENTRIES:: |
public | function | Return arrays of entries etc. to the calling process. | |
PARSEENTRIES:: |
public | function | ||
PARSEENTRIES:: |
public | function |