From 43ada1c6db2d290c5e8fa3adac5b9cfe747bd47e Mon Sep 17 00:00:00 2001 From: miker Date: Mon, 29 Aug 2005 16:24:52 +0000 Subject: [PATCH] using Unicode::Normalize to get all incoming text into Unicode normal form D git-svn-id: svn://svn.open-ils.org/ILS/trunk@1758 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../Application/Storage/Driver/Pg/fts.pm | 5 ++-- .../OpenILS/Application/Storage/WORM.pm | 27 +++++++++++-------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm index c6c64dfba3..c508346e27 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Storage/Driver/Pg/fts.pm @@ -3,11 +3,12 @@ #------------------------------------------------------------------------------- package OpenILS::Application::Storage::FTS; use OpenSRF::Utils::Logger qw/:level/; + use Unicode::Normalize; my $log = 'OpenSRF::Utils::Logger'; sub compile { my $self = shift; - my $term = shift; + my $term = NFD(shift()); $log->debug("Raw term: $term",DEBUG); @@ -17,7 +18,7 @@ $self = ref($self) || $self; $self = bless {} => $self; - $term =~ s/\pM//gos; + $term =~ s/(\pM+)//gos; $self->decompose($term); my $newterm = join('&', $self->words); diff --git a/Open-ILS/src/perlmods/OpenILS/Application/Storage/WORM.pm b/Open-ILS/src/perlmods/OpenILS/Application/Storage/WORM.pm index 2f6eb428b6..54d44b3bc5 100644 --- a/Open-ILS/src/perlmods/OpenILS/Application/Storage/WORM.pm +++ b/Open-ILS/src/perlmods/OpenILS/Application/Storage/WORM.pm @@ -2,6 +2,7 @@ package OpenILS::Application::Storage::WORM; use base qw/OpenILS::Application::Storage/; use strict; use warnings; +use Unicode::Normalize; use OpenSRF::EX qw/:try/; use OpenSRF::Utils::SettingsClient; @@ -49,13 +50,13 @@ my @fp_mods_xpath = ( ], fixup => ' do { - $text = lc($text); + $text = lc(NFD($text)); + $text =~ s/\pM+//gso; $text =~ s/\s+/ /sgo; $text =~ s/^\s*(.+)\s*$/$1/sgo; $text =~ s/\b(?:the|an?)\b//sgo; $text =~ s/\[.[^\]]+\]//sgo; $text =~ s/\s*[;\/\.]*$//sgo; - $text =~ s/\pM+//gso; }; ', }, @@ -66,7 +67,8 @@ my @fp_mods_xpath = ( ], fixup => ' do { - $text = lc($text); + $text = lc(NFD($text)); + $text =~ s/\pM+//gso; $text =~ s/\s+/ /sgo; $text =~ s/^\s*(.+)\s*$/$1/sgo; $text =~ s/,?\s+.*$//sgo; @@ -90,7 +92,8 @@ my @fp_mods_xpath = ( ], fixup => ' do { - $text = lc($text); + $text = lc(NFD($text)); + $text =~ s/\pM+//gso; $text =~ s/\s+/ /sgo; $text =~ s/^\s*(.+)\s*$/$1/sgo; $text =~ s/\b(?:the|an?)\b//sgo; @@ -109,7 +112,8 @@ my @fp_mods_xpath = ( ], fixup => ' do { - $text = lc($text); + $text = lc(NFD($text)); + $text =~ s/\pM+//gso; $text =~ s/\s+/ /sgo; $text =~ s/^\s*(.+)\s*$/$1/sgo; $text =~ s/,?\s+.*$//sgo; @@ -479,8 +483,8 @@ sub _marcxml_to_full_rows { my $ns = new Fieldmapper::metabib::full_rec; $ns->tag( 'LDR' ); - my $val = $tagline->textContent; - $val =~ s/(\pM)//gso; + my $val = NFD($tagline->textContent); + $val =~ s/(\pM+)//gso; $ns->value( $val ); push @ns_list, $ns; @@ -492,8 +496,8 @@ sub _marcxml_to_full_rows { my $ns = new Fieldmapper::metabib::full_rec; $ns->tag( $tagline->getAttribute( "tag" ) ); - my $val = $tagline->textContent; - $val =~ s/(\pM)//gso; + my $val = NFD($tagline->textContent); + $val =~ s/(\pM+)//gso; $ns->value( $val ); push @ns_list, $ns; @@ -515,8 +519,8 @@ sub _marcxml_to_full_rows { $ns->ind1( $ind1 ); $ns->ind2( $ind2 ); $ns->subfield( $data->getAttribute( "code" ) ); - my $val = $data->textContent; - $val =~ s/(\pM)//gso; + my $val = NFD($data->textContent); + $val =~ s/(\pM+)//gso; $ns->value( lc($val) ); push @ns_list, $ns; @@ -548,6 +552,7 @@ sub _get_field_value { $string .= $value->textContent . " "; } } + $string = NFD($string); $string =~ s/(\pM)//gso; return lc($string); } -- 2.43.2