class File in Bibliography Module 7
Same name and namespace in other branches
Class File Class to read MARC records from file(s)
Hierarchy
- class \File
Expanded class hierarchy of File
File
- modules/
marcParse/ php-marc.php, line 81 - @package PHP-MARC
View source
class File {
/**
* ========== VARIABLE DECLARATIONS ==========.
*/
/**
* Array containing raw records.
*
* @var array
*/
public $raw;
/**
* Array of warnings.
*
* @var array
*/
public $warn;
/**
* Current position in the array of records.
*
* @var int
*/
public $pointer;
/**
* ========== ERROR FUNCTIONS ==========.
*/
/**
* Croaking function.
*
* Similar to Perl's croak function, which ends parsing and raises an
* user error with a descriptive message.
*
* @param string The message to display
*/
public function _croak($msg) {
trigger_error($msg, E_USER_ERROR);
}
/**
* Fuction to issue warnings.
*
* Warnings will not be displayed unless explicitly accessed, but all
* warnings issued during parse will be stored.
*
* @param string Warning
*
* @return string Last added warning
*/
public function _warn($msg) {
$this->warn[] = $msg;
return $msg;
}
/**
* Get warning(s)
*
* Get either all warnings or a specific warning ID.
*
* @param integer ID of the warning
*
* @return array|string Return either Array of all warnings or specific warning
*/
public function warnings($id = "") {
if (!$id) {
return $this->warn;
}
else {
if (array_key_exists($id, $this->warn)) {
return $this->warn[$id];
}
else {
return "Invalid warning ID: {$id}";
}
}
}
/**
* ========== PROCESSING FUNCTIONS ==========.
*/
/**
* Return the next raw MARC record.
*
* Returns th nexts raw MARC record from the read file, unless all
* records already have been read.
*
* @return string|FALSE Either a raw record or False
*/
public function _next() {
/**
* Exit if we are at the end of the file
*/
if ($this->pointer >= count($this->raw)) {
return FALSE;
}
/**
* Read next line
*/
$usmarc = $this->raw[$this->pointer++];
// Remove illegal stuff that sometimes occurs between records
// preg_replace does not know what to do with \x00, thus omitted.
$usmarc = preg_replace("/^[\n\r]+/", "", $usmarc);
/**
* Record validation
*/
if (strlen($usmarc) < 5) {
$this
->_warn("Couldn't find record length");
}
$reclen = substr($usmarc, 0, 5);
if (preg_match("/^\\d{5}\$/", $reclen) || $reclen != strlen($usmarc)) {
$this
->_warn("Invalid record length \"{$reclen}\"");
}
return $usmarc;
}
/**
* Read in MARC record file.
*
* This function will read in MARC record files that either
* contain a single MARC record, or numerous records.
*
* @param string Name of the file
*
* @return string Returns warning if issued during read
*/
public function __construct($in) {
if (file_exists($in)) {
$input = file($in);
$recs = explode(END_OF_RECORD, join("", $input));
// Append END_OF_RECORD as we lost it when splitting
// Last is not record, as it is empty because every record ends
// with END_OF_RECORD.
for ($i = 0; $i < count($recs) - 1; $i++) {
$this->raw[] = $recs[$i] . END_OF_RECORD;
}
$this->pointer = 0;
}
else {
return $this
->_warn("Invalid input file: {$i}");
}
}
/**
* Return next Record-object.
*
* Decode the next raw MARC record and return.
*
* @return Record A Record object
*/
public function next() {
if ($raw = $this
->_next()) {
return $this
->decode($raw);
}
else {
return FALSE;
}
}
/**
* Decode a given raw MARC record.
*
* "Port" of Andy Lesters MARC::File::USMARC->decode() function into PHP. Ideas and
* "rules" have been used from USMARC::decode().
*
* @param string Raw MARC record
*
* @return Record Decoded MARC Record object
*/
public function decode($text) {
if (!preg_match("/^\\d{5}/", $text, $matches)) {
$this
->_croak('Record length "' . substr($text, 0, 5) . '" is not numeric');
}
$marc = new Record();
// Store record length.
$reclen = $matches[0];
if ($reclen != strlen($text)) {
$this
->_croak("Invalid record length: Leader says {$reclen} bytes, but it's actually " . strlen($text));
}
if (substr($text, -1, 1) != END_OF_RECORD) {
$this
->_croak("Invalid record terminator");
}
// Store leader.
$marc
->leader(substr($text, 0, LEADER_LEN));
// Bytes 12 - 16 of leader give offset to the body of the record.
$data_start = 0 + substr($text, 12, 5);
// Immediately after the leader comes the directory (no separator)
// -1 to allow for \x1e at end of directory.
$dir = substr($text, LEADER_LEN, $data_start - LEADER_LEN - 1);
// Character after the directory must be \x1e.
if (substr($text, $data_start - 1, 1) != END_OF_FIELD) {
$this
->_croak("No directory found");
}
// All directory entries 12 bytes long, so length % 12 must be 0.
if (strlen($dir) % DIRECTORY_ENTRY_LEN != 0) {
$this
->_croak("Invalid directory length");
}
// Go through all the fields.
$nfields = strlen($dir) / DIRECTORY_ENTRY_LEN;
for ($n = 0; $n < $nfields; $n++) {
// As pack returns to key 1, leave place 0 in list empty.
list(, $tagno) = unpack("A3", substr($dir, $n * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN));
list(, $len) = unpack("A3/A4", substr($dir, $n * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN));
list(, $offset) = unpack("A3/A4/A5", substr($dir, $n * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN));
// Check directory validity.
if (!preg_match("/^[0-9A-Za-z]{3}\$/", $tagno)) {
$this
->_croak("Invalid tag in directory: \"{$tagno}\"");
}
if (!preg_match("/^\\d{4}\$/", $len)) {
$this
->_croak("Invalid length in directory, tag {$tagno}: \"{$len}\"");
}
if (!preg_match("/^\\d{5}\$/", $offset)) {
$this
->_croak("Invalid offset in directory, tag {$tagno}: \"{$offset}\"");
}
if ($offset + $len > $reclen) {
$this
->_croak("Directory entry runs off the end of the record tag {$tagno}");
}
$tagdata = substr($text, $data_start + $offset, $len);
if (substr($tagdata, -1, 1) == END_OF_FIELD) {
// Get rid of the end-of-tag character.
$tagdata = substr($tagdata, 0, -1);
--$len;
}
else {
$this
->_croak("field does not end in end of field character in tag {$tagno}");
}
if (preg_match("/^\\d+\$/", $tagno) && $tagno < 10) {
$marc
->append_fields(new Field($tagno, $tagdata));
}
else {
$subfields = explode(SUBFIELD_INDICATOR, $tagdata);
$indicators = array_shift($subfields);
if (strlen($indicators) > 2 || strlen($indicators) == 0) {
$this
->_warn("Invalid indicators \"{$indicators}\" forced to blanks for tag {$tagno}\n");
list($ind1, $ind2) = array(
" ",
" ",
);
}
else {
$ind1 = substr($indicators, 0, 1);
$ind2 = substr($indicators, 1, 1);
}
// Split the subfield data into subfield name and data pairs.
$subfield_data = array();
foreach ($subfields as $subfield) {
if (strlen($subfield) > 0) {
$subfield_data[substr($subfield, 0, 1)][] = substr($subfield, 1);
}
else {
$this
->_warn("Entirely empty subfield found in tag {$tagno}");
}
}
if (!isset($subfield_data)) {
$this
->_warn("No subfield data found {$location} for tag {$tagno}");
}
$marc
->append_fields(new Field($tagno, $ind1, $ind2, $subfield_data));
}
}
return $marc;
}
/**
* Get the number of records available in this Record.
*
* @return int The number of records
*/
public function num_records() {
return count($this->raw);
}
}
Members
Name | Modifiers | Type | Description | Overrides |
---|---|---|---|---|
File:: |
public | property | Current position in the array of records. | |
File:: |
public | property | Array containing raw records. | |
File:: |
public | property | Array of warnings. | |
File:: |
public | function | Decode a given raw MARC record. | |
File:: |
public | function | Return next Record-object. | |
File:: |
public | function | Get the number of records available in this Record. | |
File:: |
public | function | Get warning(s) | |
File:: |
public | function | Croaking function. | |
File:: |
public | function | Return the next raw MARC record. | |
File:: |
public | function | Fuction to issue warnings. | |
File:: |
public | function | Read in MARC record file. | 1 |