public function File::decode in Bibliography Module 7
Same name and namespace in other branches
- 6.2 modules/marcParse/php-marc.php \File::decode()
- 6 marcParse/php-marc.php \File::decode()
- 7.2 modules/marcParse/php-marc.php \File::decode()
Decode a given raw MARC record.
"Port" of Andy Lesters MARC::File::USMARC->decode() function into PHP. Ideas and "rules" have been used from USMARC::decode().
Parameters
string Raw MARC record:
Return value
Record Decoded MARC Record object
1 call to File::decode()
- File::next in modules/
marcParse/ php-marc.php - Return next Record-object.
File
- modules/
marcParse/ php-marc.php, line 256 - @package PHP-MARC
Class
Code
public function decode($text) {
if (!preg_match("/^\\d{5}/", $text, $matches)) {
$this
->_croak('Record length "' . substr($text, 0, 5) . '" is not numeric');
}
$marc = new Record();
// Store record length.
$reclen = $matches[0];
if ($reclen != strlen($text)) {
$this
->_croak("Invalid record length: Leader says {$reclen} bytes, but it's actually " . strlen($text));
}
if (substr($text, -1, 1) != END_OF_RECORD) {
$this
->_croak("Invalid record terminator");
}
// Store leader.
$marc
->leader(substr($text, 0, LEADER_LEN));
// Bytes 12 - 16 of leader give offset to the body of the record.
$data_start = 0 + substr($text, 12, 5);
// Immediately after the leader comes the directory (no separator)
// -1 to allow for \x1e at end of directory.
$dir = substr($text, LEADER_LEN, $data_start - LEADER_LEN - 1);
// Character after the directory must be \x1e.
if (substr($text, $data_start - 1, 1) != END_OF_FIELD) {
$this
->_croak("No directory found");
}
// All directory entries 12 bytes long, so length % 12 must be 0.
if (strlen($dir) % DIRECTORY_ENTRY_LEN != 0) {
$this
->_croak("Invalid directory length");
}
// Go through all the fields.
$nfields = strlen($dir) / DIRECTORY_ENTRY_LEN;
for ($n = 0; $n < $nfields; $n++) {
// As pack returns to key 1, leave place 0 in list empty.
list(, $tagno) = unpack("A3", substr($dir, $n * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN));
list(, $len) = unpack("A3/A4", substr($dir, $n * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN));
list(, $offset) = unpack("A3/A4/A5", substr($dir, $n * DIRECTORY_ENTRY_LEN, DIRECTORY_ENTRY_LEN));
// Check directory validity.
if (!preg_match("/^[0-9A-Za-z]{3}\$/", $tagno)) {
$this
->_croak("Invalid tag in directory: \"{$tagno}\"");
}
if (!preg_match("/^\\d{4}\$/", $len)) {
$this
->_croak("Invalid length in directory, tag {$tagno}: \"{$len}\"");
}
if (!preg_match("/^\\d{5}\$/", $offset)) {
$this
->_croak("Invalid offset in directory, tag {$tagno}: \"{$offset}\"");
}
if ($offset + $len > $reclen) {
$this
->_croak("Directory entry runs off the end of the record tag {$tagno}");
}
$tagdata = substr($text, $data_start + $offset, $len);
if (substr($tagdata, -1, 1) == END_OF_FIELD) {
// Get rid of the end-of-tag character.
$tagdata = substr($tagdata, 0, -1);
--$len;
}
else {
$this
->_croak("field does not end in end of field character in tag {$tagno}");
}
if (preg_match("/^\\d+\$/", $tagno) && $tagno < 10) {
$marc
->append_fields(new Field($tagno, $tagdata));
}
else {
$subfields = explode(SUBFIELD_INDICATOR, $tagdata);
$indicators = array_shift($subfields);
if (strlen($indicators) > 2 || strlen($indicators) == 0) {
$this
->_warn("Invalid indicators \"{$indicators}\" forced to blanks for tag {$tagno}\n");
list($ind1, $ind2) = array(
" ",
" ",
);
}
else {
$ind1 = substr($indicators, 0, 1);
$ind2 = substr($indicators, 1, 1);
}
// Split the subfield data into subfield name and data pairs.
$subfield_data = array();
foreach ($subfields as $subfield) {
if (strlen($subfield) > 0) {
$subfield_data[substr($subfield, 0, 1)][] = substr($subfield, 1);
}
else {
$this
->_warn("Entirely empty subfield found in tag {$tagno}");
}
}
if (!isset($subfield_data)) {
$this
->_warn("No subfield data found {$location} for tag {$tagno}");
}
$marc
->append_fields(new Field($tagno, $ind1, $ind2, $subfield_data));
}
}
return $marc;
}