]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/sql/Pg/upgrade/0138.schema.in-db-encoding-fixes.sql
LP#1643709: Stamping upgrade scripts
[working/Evergreen.git] / Open-ILS / src / sql / Pg / upgrade / 0138.schema.in-db-encoding-fixes.sql
1
2 BEGIN;
3
4 INSERT INTO config.upgrade_log (version) VALUES ('0138'); -- dbs
5 CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $func$
6         use Unicode::Normalize;
7         use Encode;
8
9         # When working with Unicode data, the first step is to decode it to
10         # a byte string; after that, lowercasing is safe
11         my $txt = lc(decode_utf8(shift));
12         my $sf = shift;
13
14         $txt = NFD($txt);
15         $txt =~ s/\pM+//go;     # Remove diacritics
16
17         $txt =~ s/\xE6/AE/go;   # Convert ae digraph
18         $txt =~ s/\x{153}/OE/go;# Convert oe digraph
19         $txt =~ s/\xFE/TH/go;   # Convert Icelandic thorn
20
21         $txt =~ tr/\x{2070}\x{2071}\x{2072}\x{2073}\x{2074}\x{2075}\x{2076}\x{2077}\x{2078}\x{2079}\x{207A}\x{207B}/0123456789+-/;# Convert superscript numbers
22         $txt =~ tr/\x{2080}\x{2081}\x{2082}\x{2083}\x{2084}\x{2085}\x{2086}\x{2087}\x{2088}\x{2089}\x{208A}\x{208B}/0123456889+-/;# Convert subscript numbers
23
24         $txt =~ tr/\x{0251}\x{03B1}\x{03B2}\x{0262}\x{03B3}/AABGG/;             # Convert Latin and Greek
25         $txt =~ tr/\x{2113}\xF0\!\"\(\)\-\{\}\<\>\;\:\.\?\xA1\xBF\/\\\@\*\%\=\xB1\+\xAE\xA9\x{2117}\$\xA3\x{FFE1}\xB0\^\_\~\`/LD /;     # Convert Misc
26         $txt =~ tr/\'\[\]\|//d;                                                 # Remove Misc
27
28         if ($sf && $sf =~ /^a/o) {
29                 my $commapos = index($txt,',');
30                 if ($commapos > -1) {
31                         if ($commapos != length($txt) - 1) {
32                                 my @list = split /,/, $txt;
33                                 my $first = shift @list;
34                                 $txt = $first . ',' . join(' ', @list);
35                         } else {
36                                 $txt =~ s/,/ /go;
37                         }
38                 }
39         } else {
40                 $txt =~ s/,/ /go;
41         }
42
43         $txt =~ s/\s+/ /go;     # Compress multiple spaces
44         $txt =~ s/^\s+//o;      # Remove leading space
45         $txt =~ s/\s+$//o;      # Remove trailing space
46
47         # Encoding the outgoing string is good practice, but not strictly
48         # necessary in this case because we've stripped everything from it
49         return encode_utf8($txt);
50 $func$ LANGUAGE 'plperlu' STRICT IMMUTABLE;
51
52 CREATE OR REPLACE FUNCTION biblio.flatten_marc ( TEXT ) RETURNS SETOF metabib.full_rec AS $func$
53
54 use MARC::Record;
55 use MARC::File::XML (BinaryEncoding => 'UTF-8');
56
57 my $xml = shift;
58 my $r = MARC::Record->new_from_xml( $xml );
59
60 return_next( { tag => 'LDR', value => $r->leader } );
61
62 for my $f ( $r->fields ) {
63         if ($f->is_control_field) {
64                 return_next({ tag => $f->tag, value => $f->data });
65         } else {
66                 for my $s ($f->subfields) {
67                         return_next({
68                                 tag      => $f->tag,
69                                 ind1     => $f->indicator(1),
70                                 ind2     => $f->indicator(2),
71                                 subfield => $s->[0],
72                                 value    => $s->[1]
73                         });
74
75                         if ( $f->tag eq '245' and $s->[0] eq 'a' ) {
76                                 my $trim = $f->indicator(2) || 0;
77                                 return_next({
78                                         tag      => 'tnf',
79                                         ind1     => $f->indicator(1),
80                                         ind2     => $f->indicator(2),
81                                         subfield => 'a',
82                                         value    => substr( $s->[1], $trim )
83                                 });
84                         }
85                 }
86         }
87 }
88
89 return undef;
90
91 $func$ LANGUAGE PLPERLU;
92
93
94 COMMIT;
95