From 17642a0bcf644256febb1a155716deee2cb7398a Mon Sep 17 00:00:00 2001 From: miker Date: Wed, 5 Sep 2007 15:32:51 +0000 Subject: [PATCH] stored procedures providing NACO normalization for controlled headings, esp authority records git-svn-id: svn://svn.open-ils.org/ILS/trunk@7761 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- Open-ILS/src/sql/Pg/020.schema.functions.sql | 43 ++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/Open-ILS/src/sql/Pg/020.schema.functions.sql b/Open-ILS/src/sql/Pg/020.schema.functions.sql index 9a17029a92..c2a85bd4f5 100644 --- a/Open-ILS/src/sql/Pg/020.schema.functions.sql +++ b/Open-ILS/src/sql/Pg/020.schema.functions.sql @@ -16,6 +16,49 @@ CREATE OR REPLACE FUNCTION public.non_filing_normalize ( TEXT, "char" ) RETURNS ); $$ LANGUAGE SQL STRICT IMMUTABLE; +CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT, TEXT ) RETURNS TEXT AS $func$ + my $txt = lc(shift); + my $sf = shift; + + $txt =~ s/\pM+//go; # Remove diacritics + + $txt =~ s/\xE6/AE/go; # Convert ae digraph + $txt =~ s/\x{153}/OE/go;# Convert oe digraph + $txt =~ s/\xFE/TH/go; # Convert Icelandic thorn + + $txt =~ tr/\x{2070}\x{2071}\x{2072}\x{2073}\x{2074}\x{2075}\x{2076}\x{2077}\x{2078}\x{2079}\x{207A}\x{207B}/0123456789+-/;# Convert superscript numbers + $txt =~ tr/\x{2080}\x{2081}\x{2082}\x{2083}\x{2084}\x{2085}\x{2086}\x{2087}\x{2088}\x{2089}\x{208A}\x{208B}/0123456889+-/;# Convert subscript numbers + + $txt =~ tr/\x{0251}\x{03B1}\x{03B2}\x{0262}\x{03B3}/AABGG/; # Convert Latin and Greek + $txt =~ tr/\x{2113}\xF0\!\"\(\)\-\{\}\<\>\;\:\.\?\xA1\xBF\/\\\@\*\%\=\xB1\+\xAE\xA9\x{2117}\$\xA3\x{FFE1}\xB0\^\_\~\`/LD /; # Convert Misc + $txt =~ tr/\'\[\]\|//d; # Remove Misc + + if ($sf =~ /^a/o) { + my $commapos = index($txt,','); + if ($commapos > -1) { + if ($commapos != length($txt) - 1) { + my @list = split /,/, $txt; + my $first = shift @list; + $txt = $first . ',' . join(' ', @list); + } else { + $txt =~ s/,/ /go; + } + } + } else { + $txt =~ s/,/ /go; + } + + $txt =~ s/\s+/ /go; # Compress multiple spaces + $txt =~ s/^\s+//o; # Remove leading space + $txt =~ s/\s+$//o; # Remove trailing space + + return $txt; +$func$ LANGUAGE 'plperlu' STRICT IMMUTABLE; + +CREATE OR REPLACE FUNCTION public.naco_normalize( TEXT ) RETURNS TEXT AS $func$ + SELECT public.naco_normalize($1,''); +$func$ LANGUAGE 'sql' STRICT IMMUTABLE; + CREATE OR REPLACE FUNCTION public.call_number_dewey( TEXT ) RETURNS TEXT AS $$ my $txt = shift; $txt =~ s/^\s+//o; -- 2.43.2