BEGIN; -- Make the authority heading normalization routine more defensive -- Also drop back to a plain index for 2.0, we will get more restrictive over time INSERT INTO config.upgrade_log (version) VALUES ('0402'); -- dbs CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$ use strict; use warnings; use utf8; use MARC::Record; use MARC::File::XML (BinaryEncoding => 'UTF8'); use UUID::Tiny ':std'; my $xml = shift() or return undef; my $r; # Prevent errors in XML parsing from blowing out ungracefully eval { $r = MARC::Record->new_from_xml( $xml ); 1; } or do { return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml); }; if (!$r) { return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml); } # From http://www.loc.gov/standards/sourcelist/subject.html my $thes_code_map = { a => 'lcsh', b => 'lcshac', c => 'mesh', d => 'nal', k => 'cash', n => 'notapplicable', r => 'aat', s => 'sears', v => 'rvm', }; # Default to "No attempt to code" if the leader is horribly broken my $fixed_field = $r->field('008'); my $thes_char = '|'; if ($fixed_field) { $thes_char = substr($fixed_field->data(), 11, 1) || '|'; } my $thes_code = 'UNDEFINED'; if ($thes_char eq 'z') { # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html $thes_code = $r->subfield('040', 'f') || 'UNDEFINED'; } elsif ($thes_code_map->{$thes_char}) { $thes_code = $thes_code_map->{$thes_char}; } my $auth_txt = ''; my $head = $r->field('1..'); if ($head) { # Concatenate all of these subfields together, prefixed by their code # to prevent collisions along the lines of "Fiction, North Carolina" foreach my $sf ($head->subfields()) { $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1]; } } # Perhaps better to parameterize the spi and pass as a parameter $auth_txt =~ s/'//go; if ($auth_txt) { my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text"); my $norm_txt = $result->{rows}[0]->{norm_text}; return $head->tag() . "_" . $thes_code . " " . $norm_txt; } return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml); $func$ LANGUAGE 'plperlu' IMMUTABLE; DROP INDEX authority.unique_by_heading_and_thesaurus; CREATE INDEX by_heading_and_thesaurus ON authority.record_entry (authority.normalize_heading(marc)) WHERE deleted IS FALSE or deleted = FALSE ; COMMIT;