class PARSEENTRIES in Bibliography Module 5
Same name and namespace in other branches
- 6.2 modules/bibtexParse/PARSEENTRIES.php \PARSEENTRIES
- 6 bibtexParse/PARSEENTRIES.php \PARSEENTRIES
- 7.3 plugins/biblio_style/bibtex/PARSEENTRIES.php \PARSEENTRIES
- 7 modules/bibtexParse/PARSEENTRIES.php \PARSEENTRIES
- 7.2 modules/bibtexParse/PARSEENTRIES.php \PARSEENTRIES
Hierarchy
- class \PARSEENTRIES
Expanded class hierarchy of PARSEENTRIES
File
- bibtexParse/
PARSEENTRIES.php, line 134
View source
class PARSEENTRIES {
function PARSEENTRIES() {
require_once drupal_get_path('module', 'biblio') . '/bibtexParse/transtab_latex_unicode.inc.php';
$this->preamble = $this->strings = $this->undefinedStrings = $this->entries = array();
$this->count = 0;
$this->fieldExtract = TRUE;
$this->removeDelimit = TRUE;
$this->expandMacro = FALSE;
$this->parseFile = TRUE;
$this->outsideEntry = TRUE;
$this->translate_latex = TRUE;
}
// Open bib file
function openBib($file) {
if (!is_file($file)) {
die;
}
$this->fid = fopen($file, 'r');
$this->parseFile = TRUE;
}
// Load a bibtex string to parse it
function loadBibtexString($bibtex_string) {
if (is_string($bibtex_string)) {
//$bibtex_string = $this->searchReplaceText($this->transtab_latex_unicode, $bibtex_string, false);
$this->bibtexString = explode("\n", $bibtex_string);
}
else {
$this->bibtexString = $bibtex_string;
}
$this->parseFile = FALSE;
$this->currentLine = 0;
}
function searchReplaceText($searchReplaceActionsArray, $sourceString, $includesSearchPatternDelimiters = FALSE) {
set_time_limit(30);
// reset the script timer to avoid timeouts
// apply the search & replace actions defined in '$searchReplaceActionsArray' to the text passed in '$sourceString':
foreach ($searchReplaceActionsArray as $searchString => $replaceString) {
if (!$includesSearchPatternDelimiters) {
$searchString = "/" . $searchString . "/";
}
// add search pattern delimiters
if (preg_match($searchString, $sourceString)) {
$sourceString = preg_replace($searchString, $replaceString, $sourceString);
}
}
return $sourceString;
}
// Set strings macro
function loadStringMacro($macro_array) {
$this->userStrings = $macro_array;
}
// Close bib file
function closeBib() {
fclose($this->fid);
}
// Get a non-empty line from the bib file or from the bibtexString
function getLine() {
if ($this->parseFile) {
if (!feof($this->fid)) {
do {
$line = trim(fgets($this->fid));
} while (!feof($this->fid) && !$line);
return $line;
}
return FALSE;
}
else {
do {
$line = trim($this->bibtexString[$this->currentLine]);
$this->currentLine++;
} while ($this->currentLine < count($this->bibtexString) && !$line);
return $line;
}
}
// Extract value part of @string field enclosed by double-quotes or braces.
// The string may be expanded with previously-defined strings
function extractStringValue($string) {
// $string contains a end delimiter, remove it
$string = trim(substr($string, 0, strlen($string) - 1));
// remove delimiters and expand
$string = $this
->removeDelimitersAndExpand($string);
return $string;
}
// Extract a field
function fieldSplit($seg) {
// echo "**** ";print_r($seg);echo "<BR>";
// handle fields like another-field = {}
$array = preg_split("/,\\s*([-_.:,a-zA-Z0-9]+)\\s*={1}\\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE);
// echo "**** ";print_r($array);echo "<BR>";
//$array = preg_split("/,\s*(\w+)\s*={1}\s*/U", $seg, PREG_SPLIT_DELIM_CAPTURE);
if (!array_key_exists(1, $array)) {
return array(
$array[0],
FALSE,
);
}
return array(
$array[0],
$array[1],
);
}
// Extract and format fields
function reduceFields($oldString) {
// 03/05/2005 G. Gardey. Do not remove all occurences, juste one
// * correctly parse an entry ended by: somefield = {aValue}}
$lg = strlen($oldString);
if ($oldString[$lg - 1] == "}" || $oldString[$lg - 1] == ")" || $oldString[$lg - 1] == ",") {
$oldString = substr($oldString, 0, $lg - 1);
}
// $oldString = rtrim($oldString, "}),");
$split = preg_split("/=/", $oldString, 2);
$string = $split[1];
while ($string) {
list($entry, $string) = $this
->fieldSplit($string);
$values[] = $entry;
}
foreach ($values as $value) {
$pos = strpos($oldString, $value);
$oldString = substr_replace($oldString, '', $pos, strlen($value));
}
$rev = strrev(trim($oldString));
if ($rev[0] != ',') {
$oldString .= ',';
}
$keys = preg_split("/=,/", $oldString);
// 22/08/2004 - Mark Grimshaw
// I have absolutely no idea why this array_pop is required but it is. Seems to always be
// an empty key at the end after the split which causes problems if not removed.
array_pop($keys);
foreach ($keys as $key) {
$value = trim(array_shift($values));
$rev = strrev($value);
// remove any dangling ',' left on final field of entry
if ($rev[0] == ',') {
$value = rtrim($value, ",");
}
if (!$value) {
continue;
}
// 21/08/2004 G.Gardey -> expand macro
// Don't remove delimiters now needs to know if the value is a string macro
// $this->entries[$this->count][strtolower(trim($key))] = trim($this->removeDelimiters(trim($value)));
$key = strtolower(trim($key));
$value = trim($value);
$this->entries[$this->count][$key] = $value;
}
// echo "**** ";print_r($this->entries[$this->count]);echo "<BR>";
}
// Start splitting a bibtex entry into component fields.
// Store the entry type and citation.
function fullSplit($entry) {
$matches = preg_split("/@(.*)[{(](.*),/U", $entry, 2, PREG_SPLIT_DELIM_CAPTURE);
$this->entries[$this->count]['bibtexEntryType'] = strtolower(trim($matches[1]));
// sometimes a bibtex entry will have no citation key
if (preg_match("/=/", $matches[2])) {
// this is a field
$matches = preg_split("/@(.*)\\s*[{(](.*)/U", $entry, 2, PREG_SPLIT_DELIM_CAPTURE);
}
// print_r($matches); print "<P>";
$this->entries[$this->count]['bibtexCitation'] = $matches[2];
$this
->reduceFields($matches[3]);
}
// Grab a complete bibtex entry
function parseEntry($entry) {
$entry = $this->translate_latex ? $this
->searchReplaceText($this->transtab_latex_unicode, $entry, false) : $entry;
$count = 0;
$lastLine = FALSE;
if (preg_match("/@(.*)([{(])/U", preg_quote($entry), $matches)) {
if (!array_key_exists(1, $matches)) {
return $lastLine;
}
if (preg_match("/string/i", trim($matches[1]))) {
$this->strings[] = $entry;
}
else {
if (preg_match("/preamble/i", trim($matches[1]))) {
$this->preamble[] = $entry;
}
else {
if (preg_match("/comment/i", $matches[1])) {
}
else {
if ($this->fieldExtract) {
$this
->fullSplit($entry);
}
else {
$this->entries[$this->count] = $entry;
}
$this->count++;
}
}
}
return $lastLine;
}
}
// Remove delimiters from a string
function removeDelimiters($string) {
if ($string && $string[0] == "\"") {
$string = substr($string, 1);
$string = substr($string, 0, -1);
}
else {
if ($string && $string[0] == "{") {
if (strlen($string) > 0 && $string[strlen($string) - 1] == "}") {
$string = substr($string, 1);
$string = substr($string, 0, -1);
}
}
else {
if (!is_numeric($string) && !array_key_exists($string, $this->strings) && array_search($string, $this->undefinedStrings) === FALSE) {
$this->undefinedStrings[] = $string;
// Undefined string that is not a year etc.
return '';
}
}
}
return $string;
}
// This function works like explode('#',$val) but has to take into account whether
// the character # is part of a string (i.e., is enclosed into "..." or {...} )
// or defines a string concatenation as in @string{ "x # x" # ss # {xx{x}x} }
function explodeString($val) {
$openquote = $bracelevel = $i = $j = 0;
while ($i < strlen($val)) {
if ($val[$i] == '"') {
$openquote = !$openquote;
}
elseif ($val[$i] == '{') {
$bracelevel++;
}
elseif ($val[$i] == '}') {
$bracelevel--;
}
elseif ($val[$i] == '#' && !$openquote && !$bracelevel) {
$strings[] = substr($val, $j, $i - $j);
$j = $i + 1;
}
$i++;
}
$strings[] = substr($val, $j);
return $strings;
}
// This function receives a string and a closing delimiter '}' or ')'
// and looks for the position of the closing delimiter taking into
// account the following Bibtex rules:
// * Inside the braces, there can arbitrarily nested pairs of braces,
// but braces must also be balanced inside quotes!
// * Inside quotes, to place the " character it is not sufficient
// to simply escape with \": Quotes must be placed inside braces.
function closingDelimiter($val, $delimitEnd) {
// echo "####>$delimitEnd $val<BR>";
$openquote = $bracelevel = $i = $j = 0;
while ($i < strlen($val)) {
// a '"' found at brace level 0 defines a value such as "ss{\"o}ss"
if ($val[$i] == '"' && !$bracelevel) {
$openquote = !$openquote;
}
elseif ($val[$i] == '{') {
$bracelevel++;
}
elseif ($val[$i] == '}') {
$bracelevel--;
}
if ($val[$i] == $delimitEnd && !$openquote && !$bracelevel) {
return $i;
}
$i++;
}
// echo "--> $bracelevel, $openquote";
return 0;
}
// Remove enclosures around entry field values. Additionally, expand macros if flag set.
function removeDelimitersAndExpand($string, $inpreamble = FALSE) {
// only expand the macro if flag set, if strings defined and not in preamble
if (!$this->expandMacro || empty($this->strings) || $inpreamble) {
$string = $this
->removeDelimiters($string);
}
else {
$stringlist = $this
->explodeString($string);
$string = "";
foreach ($stringlist as $str) {
// trim the string since usually # is enclosed by spaces
$str = trim($str);
// replace the string if macro is already defined
// strtolower is used since macros are case insensitive
if (isset($this->strings[strtolower($str)])) {
$string .= $this->strings[strtolower($str)];
}
else {
$string .= $this
->removeDelimiters(trim($str));
}
}
}
return $string;
}
// This function extract entries taking into account how comments are defined in BibTeX.
// BibTeX splits the file in two areas: inside an entry and outside an entry, the delimitation
// being indicated by the presence of a @ sign. When this character is met, BibTex expects to
// find an entry. Before that sign, and after an entry, everything is considered a comment!
function extractEntries() {
$inside = $possibleEntryStart = FALSE;
$entry = "";
while ($line = $this
->getLine()) {
if ($possibleEntryStart) {
$line = $possibleEntryStart . $line;
}
if (!$inside && strchr($line, "@")) {
// throw all characters before the '@'
$line = strstr($line, '@');
if (!strchr($line, "{") && !strchr($line, "(")) {
$possibleEntryStart = $line;
}
elseif (preg_match("/@.*([{(])/U", preg_quote($line), $matches)) {
$inside = TRUE;
if ($matches[1] == '{') {
$delimitEnd = '}';
}
else {
$delimitEnd = ')';
}
$possibleEntryStart = FALSE;
}
}
if ($inside) {
$entry .= " " . $line;
if ($j = $this
->closingDelimiter($entry, $delimitEnd)) {
// all characters after the delimiter are thrown but the remaining
// characters must be kept since they may start the next entry !!!
$lastLine = substr($entry, $j + 1);
$entry = substr($entry, 0, $j + 1);
// Strip excess whitespaces from the entry
$entry = preg_replace('/\\s\\s+/', ' ', $entry);
$this
->parseEntry($entry);
$entry = strchr($lastLine, "@");
if ($entry) {
$inside = TRUE;
}
else {
$inside = FALSE;
}
}
}
}
}
// Return arrays of entries etc. to the calling process.
function returnArrays() {
foreach ($this->preamble as $value) {
preg_match("/.*?[{(](.*)/", $value, $matches);
$preamble = substr($matches[1], 0, -1);
$preambles['bibtexPreamble'] = trim($this
->removeDelimitersAndExpand(trim($preamble), TRUE));
}
if (isset($preambles)) {
$this->preamble = $preambles;
}
if ($this->fieldExtract) {
// Next lines must take into account strings defined by previously-defined strings
$strings = $this->strings;
// $this->strings is initialized with strings provided by user if they exists
// it is supposed that there are no substitutions to be made in the user strings, i.e., no #
$this->strings = isset($this->userStrings) ? $this->userStrings : array();
foreach ($strings as $value) {
// changed 21/08/2004 G. Gardey
// 23/08/2004 Mark G. account for comments on same line as @string - count delimiters in string value
$value = trim($value);
$matches = preg_split("/@\\s*string\\s*([{(])/i", $value, 2, PREG_SPLIT_DELIM_CAPTURE);
$delimit = $matches[1];
$matches = preg_split("/=/", $matches[2], 2, PREG_SPLIT_DELIM_CAPTURE);
// macros are case insensitive
$this->strings[strtolower(trim($matches[0]))] = $this
->extractStringValue($matches[1]);
}
}
// changed 21/08/2004 G. Gardey
// 22/08/2004 Mark Grimshaw - stopped useless looping.
// removeDelimit and expandMacro have NO effect if !$this->fieldExtract
if ($this->removeDelimit || $this->expandMacro && $this->fieldExtract) {
for ($i = 0; $i < count($this->entries); $i++) {
foreach ($this->entries[$i] as $key => $value) {
// 02/05/2005 G. Gardey don't expand macro for bibtexCitation
// and bibtexEntryType
if ($key != 'bibtexCitation' && $key != 'bibtexEntryType') {
$this->entries[$i][$key] = trim($this
->removeDelimitersAndExpand($this->entries[$i][$key]));
}
}
}
}
// EZ: Remove this to be able to use the same instance for parsing several files,
// e.g., parsing a entry file with its associated abbreviation file
// if(empty($this->preamble))
// $this->preamble = FALSE;
// if(empty($this->strings))
// $this->strings = FALSE;
// if(empty($this->entries))
// $this->entries = FALSE;
return array(
$this->preamble,
$this->strings,
$this->entries,
$this->undefinedStrings,
);
}
function bib2node(&$node_array, $node) {
list($preamble, $strings, $entries, $undefinedStrings) = $this
->returnArrays();
foreach ($entries as $entry) {
$node_id = array_push($node_array, $node) - 1;
switch ($entry['bibtexEntryType']) {
case article:
$node_array[$node_id]['biblio_type'] = 102;
break;
case book:
$node_array[$node_id]['biblio_type'] = 100;
break;
case booklet:
case inbook:
$node_array[$node_id]['biblio_type'] = 101;
break;
case conference:
$node_array[$node_id]['biblio_type'] = 103;
break;
case incollection:
$node_array[$node_id]['biblio_type'] = 100;
break;
case inproceedings:
$node_array[$node_id]['biblio_type'] = 103;
break;
case manual:
$node_array[$node_id]['biblio_type'] = 129;
break;
case mastersthesis:
$node_array[$node_id]['biblio_type'] = 108;
break;
case misc:
$node_array[$node_id]['biblio_type'] = 129;
break;
case phdthesis:
$node_array[$node_id]['biblio_type'] = 108;
break;
case proceedings:
$node_array[$node_id]['biblio_type'] = 104;
break;
case techreport:
$node_array[$node_id]['biblio_type'] = 109;
break;
case unpublished:
$node_array[$node_id]['biblio_type'] = 124;
break;
}
if (!empty($entry['author'])) {
$node_array[$node_id]['biblio_authors'] = preg_replace("/\\s(and|&)\\s/i", "; ", $entry['author']);
}
/*
$creator = new PARSECREATORS();
$creatorArray = $creator->parse($entry['author']);
foreach ($creatorArray as $auth){
$node_array[$node_id]['biblio_authors'] .= (empty($node_array[$node_id]['biblio_authors'])) ? "":"; " ;
$node_array[$node_id]['biblio_authors'] .= (!empty($auth['lastname'])) ? $auth['lastname'].((!empty($auth['firstname']) || !empty($auth['initials']) || !empty($auth['prefix']) )?', ':''):'' ;
$node_array[$node_id]['biblio_authors'] .= (!empty($auth['prefix'])) ? $auth['prefix'].' ':'' ;
$node_array[$node_id]['biblio_authors'] .= (!empty($auth['firstname'])) ? $auth['firstname'].' ':'' ;
$node_array[$node_id]['biblio_authors'] .= (!empty($auth['initials'])) ? $auth['initials'].'. ':'' ;
}
}
*/
if (!empty($entry['bibtexCitation'])) {
$node_array[$node_id]['biblio_citekey'] = $entry['bibtexCitation'];
}
if (!empty($entry['editor'])) {
$node_array[$node_id]['biblio_secondary_authors'] = $entry['editor'];
}
if (!empty($entry['journal'])) {
$node_array[$node_id]['biblio_secondary_title'] = $entry['journal'];
}
if (!empty($entry['booktitle'])) {
$node_array[$node_id]['biblio_secondary_title'] = $entry['booktitle'];
}
if (!empty($entry['series'])) {
$node_array[$node_id]['biblio_secondary_title'] = $entry['series'];
}
if (!empty($entry['volume'])) {
$node_array[$node_id]['biblio_volume'] = $entry['volume'];
}
if (!empty($entry['number'])) {
$node_array[$node_id]['biblio_number'] = $entry['number'];
}
if (!empty($entry['year'])) {
$node_array[$node_id]['biblio_year'] = $entry['year'];
}
if (!empty($entry['note'])) {
$node_array[$node_id]['biblio_notes'] = $entry['note'];
}
if (!empty($entry['month'])) {
$node_array[$node_id]['biblio_date'] = $entry['month'];
}
if (!empty($entry['pages'])) {
$node_array[$node_id]['biblio_pages'] = $entry['pages'];
}
if (!empty($entry['publisher'])) {
$node_array[$node_id]['biblio_publisher'] = $entry['publisher'];
}
if (!empty($entry['organization'])) {
$node_array[$node_id]['biblio_publisher'] = $entry['organization'];
}
if (!empty($entry['school'])) {
$node_array[$node_id]['biblio_publisher'] = $entry['school'];
}
if (!empty($entry['institution'])) {
$node_array[$node_id]['biblio_publisher'] = $entry['institution'];
}
if (!empty($entry['title'])) {
$node_array[$node_id]['title'] = $entry['title'];
}
if (!empty($entry['type'])) {
$node_array[$node_id]['biblio_type_of_work'] = $entry['type'];
}
if (!empty($entry['edition'])) {
$node_array[$node_id]['biblio_edition'] = $entry['edition'];
}
if (!empty($entry['chapter'])) {
$node_array[$node_id]['biblio_section'] = $entry['chapter'];
}
if (!empty($entry['address'])) {
$node_array[$node_id]['biblio_place_published'] = $entry['address'];
}
if (!empty($entry['abstract'])) {
$node_array[$node_id]['biblio_abst_e'] = $entry['abstract'];
}
if (!empty($entry['keywords'])) {
$node_array[$node_id]['biblio_keywords'] = $entry['keywords'];
}
if (!empty($entry['isbn'])) {
$node_array[$node_id]['biblio_isbn'] = $entry['isbn'];
}
if (!empty($entry['url'])) {
$node_array[$node_id]['biblio_url'] = $entry['url'];
}
}
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function | |||
PARSEENTRIES:: |
function |