You are here

private function MarcEncoder::decodeFile in Bibliography & Citation 8

Same name and namespace in other branches
  1. 2.0.x modules/bibcite_marc/src/Encoder/MarcEncoder.php \Drupal\bibcite_marc\Encoder\MarcEncoder::decodeFile()

Decode a given raw MARC record.

"Port" of Andy Lesters MARC::File::USMARC->decode() function into PHP. Ideas and "rules" have been used from USMARC::decode().

Parameters

string $text: MARC record.

Return value

Record Record Decoded MARC Record object

1 call to MarcEncoder::decodeFile()
MarcEncoder::decode in modules/bibcite_marc/src/Encoder/MarcEncoder.php

File

modules/bibcite_marc/src/Encoder/MarcEncoder.php, line 86

Class

MarcEncoder
Marc format encoder.

Namespace

Drupal\bibcite_marc\Encoder

Code

private function decodeFile($text) {
  if (!preg_match("/^\\d{5}/", $text, $matches)) {
    $this
      ->croak('Record length "' . substr($text, 0, 5) . '" is not numeric');
  }
  $marc = new Record();

  // Store record length.
  $reclen = $matches[0];
  if ($reclen != strlen($text)) {
    $this
      ->croak("Invalid record length: Leader says {$reclen} bytes, but it's actually " . strlen($text));
  }
  if (substr($text, -1, 1) != File::END_OF_RECORD) {
    $this
      ->croak("Invalid record terminator");
  }

  // Store leader.
  $marc
    ->leader(substr($text, 0, File::LEADER_LEN));

  // Bytes 12 - 16 of leader give offset to the body of the record.
  $data_start = 0 + substr($text, 12, 5);

  // Immediately after the leader comes the directory (no separator)
  // -1 to allow for \x1e at end of directory.
  $dir = substr($text, File::LEADER_LEN, $data_start - File::LEADER_LEN - 1);
  if (substr($text, $data_start - 1, 1) != File::END_OF_FIELD) {
    $this
      ->croak("No directory found");
  }

  // All directory entries 12 bytes long, so length % 12 must be 0.
  if (strlen($dir) % File::DIRECTORY_ENTRY_LEN != 0) {
    $this
      ->croak("Invalid directory length");
  }

  // Go through all the fields.
  $nfields = strlen($dir) / File::DIRECTORY_ENTRY_LEN;
  for ($n = 0; $n < $nfields; $n++) {

    // As pack returns to key 1, leave place 0 in list empty.
    list(, $tagno) = unpack("A3", substr($dir, $n * File::DIRECTORY_ENTRY_LEN, File::DIRECTORY_ENTRY_LEN));
    list(, $len) = unpack("A3/A4", substr($dir, $n * File::DIRECTORY_ENTRY_LEN, File::DIRECTORY_ENTRY_LEN));
    list(, $offset) = unpack("A3/A4/A5", substr($dir, $n * File::DIRECTORY_ENTRY_LEN, File::DIRECTORY_ENTRY_LEN));

    // Check directory validity.
    if (!preg_match("/^[0-9A-Za-z]{3}\$/", $tagno)) {
      $this
        ->croak("Invalid tag in directory: \"{$tagno}\"");
    }
    if (!preg_match("/^\\d{4}\$/", $len)) {
      $this
        ->croak("Invalid length in directory, tag {$tagno}: \"{$len}\"");
    }
    if (!preg_match("/^\\d{5}\$/", $offset)) {
      $this
        ->croak("Invalid offset in directory, tag {$tagno}: \"{$offset}\"");
    }
    if ($offset + $len > $reclen) {
      $this
        ->croak("Directory entry runs off the end of the record tag {$tagno}");
    }
    $tagdata = substr($text, $data_start + $offset, $len);
    if (substr($tagdata, -1, 1) == File::END_OF_FIELD) {

      // Get rid of the end-of-tag character.
      $tagdata = substr($tagdata, 0, -1);
      $len--;
    }
    else {
      $this
        ->croak("field does not end in end of field character in tag {$tagno}");
    }
    if (preg_match("/^\\d+\$/", $tagno) && $tagno < 10) {
      $marc
        ->append_fields(new Field($tagno, $tagdata));
    }
    else {
      $subfields = @preg_split('/' . File::SUBFIELD_INDICATOR . '/', $tagdata);
      $indicators = array_shift($subfields);
      if (strlen($indicators) > 2 || strlen($indicators) == 0) {

        //$this->_warn("Invalid indicators \"$indicators\" forced to blanks for tag $tagno\n");
        list($ind1, $ind2) = [
          " ",
          " ",
        ];
      }
      else {
        $ind1 = substr($indicators, 0, 1);
        $ind2 = substr($indicators, 1, 1);
      }

      // Split the subfield data into subfield name and data pairs.
      $subfield_data = [];
      foreach ($subfields as $subfield) {
        if (strlen($subfield) > 0) {
          $subfield_data[substr($subfield, 0, 1)] = substr($subfield, 1);
        }

        /*else {
            $this->_warn( "Entirely empty subfield found in tag $tagno" );
          }*/
      }

      /*if (!isset($subfield_data)) {
          $this->_warn( "No subfield data found $location for tag $tagno" );
        }*/
      $marc
        ->append_fields(new Field($tagno, $ind1, $ind2, $subfield_data));
    }
  }
  return $marc;
}