BEGIN; INSERT INTO config.upgrade_log (version) VALUES ('0528'); -- dbs CREATE OR REPLACE FUNCTION maintain_control_numbers() RETURNS TRIGGER AS $func$ use strict; use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF-8'); use MARC::Charset; use Encode; use Unicode::Normalize; MARC::Charset->assume_unicode(1); my $record = MARC::Record->new_from_xml($_TD->{new}{marc}); my $schema = $_TD->{table_schema}; my $rec_id = $_TD->{new}{id}; # Short-circuit if maintaining control numbers per MARC21 spec is not enabled my $enable = spi_exec_query("SELECT enabled FROM config.global_flag WHERE name = 'cat.maintain_control_numbers'"); if (!($enable->{processed}) or $enable->{rows}[0]->{enabled} eq 'f') { return; } # Get the control number identifier from an OU setting based on $_TD->{new}{owner} my $ou_cni = 'EVRGRN'; my $owner; if ($schema eq 'serial') { $owner = $_TD->{new}{owning_lib}; } else { # are.owner and bre.owner can be null, so fall back to the consortial setting $owner = $_TD->{new}{owner} || 1; } my $ous_rv = spi_exec_query("SELECT value FROM actor.org_unit_ancestor_setting('cat.marc_control_number_identifier', $owner)"); if ($ous_rv->{processed}) { $ou_cni = $ous_rv->{rows}[0]->{value}; $ou_cni =~ s/"//g; # Stupid VIM syntax highlighting" } else { # Fall back to the shortname of the OU if there was no OU setting $ous_rv = spi_exec_query("SELECT shortname FROM actor.org_unit WHERE id = $owner"); if ($ous_rv->{processed}) { $ou_cni = $ous_rv->{rows}[0]->{shortname}; } } my ($create, $munge) = (0, 0); my @scns = $record->field('035'); foreach my $id_field ('001', '003') { my $spec_value; my @controls = $record->field($id_field); if ($id_field eq '001') { $spec_value = $rec_id; } else { $spec_value = $ou_cni; } # Create the 001/003 if none exist if (scalar(@controls) == 1) { # Only one field; check to see if we need to munge it unless (grep $_->data() eq $spec_value, @controls) { $munge = 1; } } else { # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match foreach my $control (@controls) { unless ($control->data() eq $spec_value) { $record->delete_field($control); } } $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value)); $create = 1; } } # Now, if we need to munge the 001, we will first push the existing 001/003 # into the 035; but if the record did not have one (and one only) 001 and 003 # to begin with, skip this process if ($munge and not $create) { my $scn = "(" . $record->field('003')->data() . ")" . $record->field('001')->data(); # Do not create duplicate 035 fields unless (grep $_->subfield('a') eq $scn, @scns) { $record->insert_fields_ordered(MARC::Field->new('035', '', '', 'a' => $scn)); } } # Set the 001/003 and update the MARC if ($create or $munge) { $record->field('001')->data($rec_id); $record->field('003')->data($ou_cni); my $xml = $record->as_xml_record(); $xml =~ s/\n//sgo; $xml =~ s/^<\?xml.+\?\s*>//go; $xml =~ s/>\s+entityize() # to avoid having to set PERL5LIB for PostgreSQL as well # If we are going to convert non-ASCII characters to XML entities, # we had better be dealing with a UTF8 string to begin with $xml = decode_utf8($xml); $xml = NFC($xml); # Convert raw ampersands to entities $xml =~ s/&(?!\S+;)/&/gso; # Convert Unicode characters to entities $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe; $xml =~ s/[\x00-\x1f]//go; $_TD->{new}{marc} = $xml; return "MODIFY"; } return; $func$ LANGUAGE PLPERLU; CREATE OR REPLACE FUNCTION authority.generate_overlay_template ( TEXT, BIGINT ) RETURNS TEXT AS $func$ use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF-8'); use MARC::Charset; MARC::Charset->assume_unicode(1); my $xml = shift; my $r = MARC::Record->new_from_xml( $xml ); return undef unless ($r); my $id = shift() || $r->subfield( '901' => 'c' ); $id =~ s/^\s*(?:\([^)]+\))?\s*(.+)\s*?$/$1/; return undef unless ($id); # We need an ID! my $tmpl = MARC::Record->new(); $tmpl->encoding( 'UTF-8' ); my @rule_fields; for my $field ( $r->field( '1..' ) ) { # Get main entry fields from the authority record my $tag = $field->tag; my $i1 = $field->indicator(1); my $i2 = $field->indicator(2); my $sf = join '', map { $_->[0] } $field->subfields; my @data = map { @$_ } $field->subfields; my @replace_them; # Map the authority field to bib fields it can control. if ($tag >= 100 and $tag <= 111) { # names @replace_them = map { $tag + $_ } (0, 300, 500, 600, 700); } elsif ($tag eq '130') { # uniform title @replace_them = qw/130 240 440 730 830/; } elsif ($tag >= 150 and $tag <= 155) { # subjects @replace_them = ($tag + 500); } elsif ($tag >= 180 and $tag <= 185) { # floating subdivisions @replace_them = qw/100 400 600 700 800 110 410 610 710 810 111 411 611 711 811 130 240 440 730 830 650 651 655/; } else { next; } # Dummy up the bib-side data $tmpl->append_fields( map { MARC::Field->new( $_, $i1, $i2, @data ) } @replace_them ); # Construct some 'replace' rules push @rule_fields, map { $_ . $sf . '[0~\)' .$id . '$]' } @replace_them; } # Insert the replace rules into the template $tmpl->append_fields( MARC::Field->new( '905' => ' ' => ' ' => 'r' => join(',', @rule_fields ) ) ); $xml = $tmpl->as_xml_record; $xml =~ s/^<\?.+?\?>$//mo; $xml =~ s/\n//sgo; $xml =~ s/>\s+ 'UTF-8'); use MARC::Charset; use strict; MARC::Charset->assume_unicode(1); my $target_xml = shift; my $source_xml = shift; my $field_spec = shift; my $force_add = shift || 0; my $target_r = MARC::Record->new_from_xml( $target_xml ); my $source_r = MARC::Record->new_from_xml( $source_xml ); return $target_xml unless ($target_r && $source_r); my @field_list = split(',', $field_spec); my %fields; for my $f (@field_list) { $f =~ s/^\s*//; $f =~ s/\s*$//; if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { my $field = $1; $field =~ s/\s+//; my $sf = $2; $sf =~ s/\s+//; my $match = $3; $match =~ s/^\s*//; $match =~ s/\s*$//; $fields{$field} = { sf => [ split('', $sf) ] }; if ($match) { my ($msf,$mre) = split('~', $match); if (length($msf) > 0 and length($mre) > 0) { $msf =~ s/^\s*//; $msf =~ s/\s*$//; $mre =~ s/^\s*//; $mre =~ s/\s*$//; $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; } } } } for my $f ( keys %fields) { if ( @{$fields{$f}{sf}} ) { for my $from_field ($source_r->field( $f )) { my @tos = $target_r->field( $f ); if (!@tos) { next if (exists($fields{$f}{match}) and !$force_add); my @new_fields = map { $_->clone } $source_r->field( $f ); $target_r->insert_fields_ordered( @new_fields ); } else { for my $to_field (@tos) { if (exists($fields{$f}{match})) { next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf})); } my @new_sf = map { ($_ => $from_field->subfield($_)) } @{$fields{$f}{sf}}; $to_field->add_subfields( @new_sf ); } } } } else { my @new_fields = map { $_->clone } $source_r->field( $f ); $target_r->insert_fields_ordered( @new_fields ); } } $target_xml = $target_r->as_xml_record; $target_xml =~ s/^<\?.+?\?>$//mo; $target_xml =~ s/\n//sgo; $target_xml =~ s/>\s+ 'UTF8'); use MARC::Charset; use UUID::Tiny ':std'; MARC::Charset->assume_unicode(1); my $xml = shift() or return undef; my $r; # Prevent errors in XML parsing from blowing out ungracefully eval { $r = MARC::Record->new_from_xml( $xml ); 1; } or do { return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml); }; if (!$r) { return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml); } # From http://www.loc.gov/standards/sourcelist/subject.html my $thes_code_map = { a => 'lcsh', b => 'lcshac', c => 'mesh', d => 'nal', k => 'cash', n => 'notapplicable', r => 'aat', s => 'sears', v => 'rvm', }; # Default to "No attempt to code" if the leader is horribly broken my $fixed_field = $r->field('008'); my $thes_char = '|'; if ($fixed_field) { $thes_char = substr($fixed_field->data(), 11, 1) || '|'; } my $thes_code = 'UNDEFINED'; if ($thes_char eq 'z') { # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html $thes_code = $r->subfield('040', 'f') || 'UNDEFINED'; } elsif ($thes_code_map->{$thes_char}) { $thes_code = $thes_code_map->{$thes_char}; } my $auth_txt = ''; my $head = $r->field('1..'); if ($head) { # Concatenate all of these subfields together, prefixed by their code # to prevent collisions along the lines of "Fiction, North Carolina" foreach my $sf ($head->subfields()) { $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1]; } } if ($auth_txt) { my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT'); my $result = spi_exec_prepared($stmt, $auth_txt); my $norm_txt = $result->{rows}[0]->{norm_text}; spi_freeplan($stmt); undef($stmt); return $head->tag() . "_" . $thes_code . " " . $norm_txt; } return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml); $func$ LANGUAGE 'plperlu' IMMUTABLE; CREATE OR REPLACE FUNCTION vandelay.strip_field ( xml TEXT, field TEXT ) RETURNS TEXT AS $_$ use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF-8'); use MARC::Charset; use strict; MARC::Charset->assume_unicode(1); my $xml = shift; my $r = MARC::Record->new_from_xml( $xml ); return $xml unless ($r); my $field_spec = shift; my @field_list = split(',', $field_spec); my %fields; for my $f (@field_list) { $f =~ s/^\s*//; $f =~ s/\s*$//; if ($f =~ /^(.{3})(\w*)(?:\[([^]]*)\])?$/) { my $field = $1; $field =~ s/\s+//; my $sf = $2; $sf =~ s/\s+//; my $match = $3; $match =~ s/^\s*//; $match =~ s/\s*$//; $fields{$field} = { sf => [ split('', $sf) ] }; if ($match) { my ($msf,$mre) = split('~', $match); if (length($msf) > 0 and length($mre) > 0) { $msf =~ s/^\s*//; $msf =~ s/\s*$//; $mre =~ s/^\s*//; $mre =~ s/\s*$//; $fields{$field}{match} = { sf => $msf, re => qr/$mre/ }; } } } } for my $f ( keys %fields) { for my $to_field ($r->field( $f )) { if (exists($fields{$f}{match})) { next unless (grep { $_ =~ $fields{$f}{match}{re} } $to_field->subfield($fields{$f}{match}{sf})); } if ( @{$fields{$f}{sf}} ) { $to_field->delete_subfield(code => $fields{$f}{sf}); } else { $r->delete_field( $to_field ); } } } $xml = $r->as_xml_record; $xml =~ s/^<\?.+?\?>$//mo; $xml =~ s/\n//sgo; $xml =~ s/>\s+ 'UTF-8'); use MARC::Charset; MARC::Charset->assume_unicode(1); my $xml = shift; my $r = MARC::Record->new_from_xml( $xml ); return_next( { tag => 'LDR', value => $r->leader } ); for my $f ( $r->fields ) { if ($f->is_control_field) { return_next({ tag => $f->tag, value => $f->data }); } else { for my $s ($f->subfields) { return_next({ tag => $f->tag, ind1 => $f->indicator(1), ind2 => $f->indicator(2), subfield => $s->[0], value => $s->[1] }); if ( $f->tag eq '245' and $s->[0] eq 'a' ) { my $trim = $f->indicator(2) || 0; return_next({ tag => 'tnf', ind1 => $f->indicator(1), ind2 => $f->indicator(2), subfield => 'a', value => substr( $s->[1], $trim ) }); } } } } return undef; $func$ LANGUAGE PLPERLU; CREATE OR REPLACE FUNCTION authority.flatten_marc ( TEXT ) RETURNS SETOF authority.full_rec AS $func$ use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF-8'); use MARC::Charset; MARC::Charset->assume_unicode(1); my $xml = shift; my $r = MARC::Record->new_from_xml( $xml ); return_next( { tag => 'LDR', value => $r->leader } ); for my $f ( $r->fields ) { if ($f->is_control_field) { return_next({ tag => $f->tag, value => $f->data }); } else { for my $s ($f->subfields) { return_next({ tag => $f->tag, ind1 => $f->indicator(1), ind2 => $f->indicator(2), subfield => $s->[0], value => $s->[1] }); } } } return undef; $func$ LANGUAGE PLPERLU; COMMIT;