1 -- Use spi_prepare/spi_exec_query to delegate escaping issues to the database
2 -- (where they belong) and avoid ugly MARC corner cases
5 INSERT INTO config.upgrade_log (version) VALUES ('0476'); -- dbs
7 CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$
13 use MARC::File::XML (BinaryEncoding => 'UTF8');
14 use UUID::Tiny ':std';
16 my $xml = shift() or return undef;
20 # Prevent errors in XML parsing from blowing out ungracefully
22 $r = MARC::Record->new_from_xml( $xml );
25 return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
29 return 'BAD_MARCXML_' . create_uuid_as_string(UUID_MD5, $xml);
32 # From http://www.loc.gov/standards/sourcelist/subject.html
45 # Default to "No attempt to code" if the leader is horribly broken
46 my $fixed_field = $r->field('008');
49 $thes_char = substr($fixed_field->data(), 11, 1) || '|';
52 my $thes_code = 'UNDEFINED';
54 if ($thes_char eq 'z') {
55 # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html
56 $thes_code = $r->subfield('040', 'f') || 'UNDEFINED';
57 } elsif ($thes_code_map->{$thes_char}) {
58 $thes_code = $thes_code_map->{$thes_char};
62 my $head = $r->field('1..');
64 # Concatenate all of these subfields together, prefixed by their code
65 # to prevent collisions along the lines of "Fiction, North Carolina"
66 foreach my $sf ($head->subfields()) {
67 $auth_txt .= '‡' . $sf->[0] . ' ' . $sf->[1];
72 my $stmt = spi_prepare('SELECT public.naco_normalize($1) AS norm_text', 'TEXT');
73 my $result = spi_exec_prepared($stmt, $auth_txt);
74 my $norm_txt = $result->{rows}[0]->{norm_text};
77 return $head->tag() . "_" . $thes_code . " " . $norm_txt;
80 return 'NOHEADING_' . $thes_code . ' ' . create_uuid_as_string(UUID_MD5, $xml);
81 $func$ LANGUAGE 'plperlu' IMMUTABLE;