3 INSERT INTO config.upgrade_log (version) VALUES ('0400'); -- dbs
5 CREATE OR REPLACE FUNCTION authority.normalize_heading( TEXT ) RETURNS TEXT AS $func$
9 use MARC::File::XML (BinaryEncoding => 'UTF8');
12 my $r = MARC::Record->new_from_xml( $xml );
13 return undef unless ($r);
15 # From http://www.loc.gov/standards/sourcelist/subject.html
28 # Default to "No attempt to code" if the leader is horribly broken
29 my $thes_char = substr($r->field('008')->data(), 11, 1) || '|';
31 my $thes_code = 'UNDEFINED';
33 if ($thes_char eq 'z') {
34 # Grab the 040 $f per http://www.loc.gov/marc/authority/ad040.html
35 $thes_code = $r->subfield('040', 'f') || 'UNDEFINED';
36 } elsif ($thes_code_map->{$thes_char}) {
37 $thes_code = $thes_code_map->{$thes_char};
40 my $head = $r->field('1..');
42 foreach my $sf ($head->subfields()) {
43 $auth_txt .= $sf->[1];
47 # Perhaps better to parameterize the spi and pass as a parameter
49 my $result = spi_exec_query("SELECT public.naco_normalize('$auth_txt') AS norm_text");
50 my $norm_txt = $result->{rows}[0]->{norm_text};
52 return $head->tag() . "_" . $thes_code . " " . $norm_txt;
53 $func$ LANGUAGE 'plperlu' IMMUTABLE;
55 COMMENT ON FUNCTION authority.normalize_heading( TEXT ) IS $$
57 * Extract the authority heading, thesaurus, and NACO-normalized values
58 * from an authority record. The primary purpose is to build a unique
59 * index to defend against duplicated authority records from the same
66 -- Do this outside of a transaction to avoid failure if duplicate
67 -- authority heading / thesaurus / heading text entries already
68 -- exist in the database:
69 CREATE UNIQUE INDEX unique_by_heading_and_thesaurus
70 ON authority.record_entry (authority.normalize_heading(marc))
71 WHERE deleted IS FALSE or deleted = FALSE
74 -- If the unique index fails, uncomment the following to create
75 -- a regular index that will help find the duplicates in a hurry:
76 --CREATE INDEX by_heading_and_thesaurus
77 -- ON authority.record_entry (authority.normalize_heading(marc))
78 -- WHERE deleted IS FALSE or deleted = FALSE
81 -- Then find the duplicates like so to get an idea of how much
82 -- pain you're looking at to clean things up:
83 --SELECT id, authority.normalize_heading(marc)
84 -- FROM authority.record_entry
85 -- WHERE authority.normalize_heading(marc) IN (
86 -- SELECT authority.normalize_heading(marc)
87 -- FROM authority.record_entry
88 -- GROUP BY authority.normalize_heading(marc)
89 -- HAVING COUNT(*) > 1
93 -- Once you have removed the duplicates and the CREATE UNIQUE INDEX
94 -- statement succeeds, drop the temporary index to avoid unnecessary
96 -- DROP INDEX authority.by_heading_and_thesaurus;