From d939d7d09f231319a59f7bc309b7e40c451f273e Mon Sep 17 00:00:00 2001 From: Jason Stephenson Date: Wed, 9 Nov 2011 15:34:27 -0500 Subject: [PATCH] Add clean_marc function to OpenILS::Utils::Normalize. Add a library function to clean up MARC records for how we like to store them in the biblio.record_entry table. Having this in a library will reduce code duplication. Also, replace nearly identical code in OpenILS::Application::Vandelay and OpenILS::Application::Acq::Order with calls to this new function. Signed-off-by: Jason Stephenson Signed-off-by: Dan Scott --- .../lib/OpenILS/Application/Acq/Order.pm | 9 ++----- .../lib/OpenILS/Application/Vandelay.pm | 8 ++---- .../perlmods/lib/OpenILS/Utils/Normalize.pm | 26 ++++++++++++++++++- 3 files changed, 29 insertions(+), 14 deletions(-) diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm index 1b1aff30e9..9eef738b64 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Acq/Order.pm @@ -181,6 +181,7 @@ use OpenSRF::Utils::JSON; use OpenSRF::AppSession; use OpenILS::Utils::Fieldmapper; use OpenILS::Utils::CStoreEditor q/:funcs/; +use OpenILS::Utils::Normalize qw/clean_marc/; use OpenILS::Const qw/:const/; use OpenSRF::EX q/:try/; use OpenILS::Application::AppUtils; @@ -1258,13 +1259,7 @@ sub upload_records { last unless $r; try { - ($xml = $r->as_xml_record()) =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - $xml =~ s/[\x00-\x1f]//go; - + $xml = clean_marc($r); } catch Error with { $err = shift; $logger->warn("Proccessing XML of record $count in set $key failed with error $err. Skipping this record"); diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm index b30d6527c9..c4d4332525 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Vandelay.pm @@ -9,6 +9,7 @@ use OpenSRF::Utils::SettingsClient; use OpenSRF::Utils::Cache; use OpenILS::Utils::Fieldmapper; use OpenILS::Utils::CStoreEditor qw/:funcs/; +use OpenILS::Utils::Normalize qw/clean_marc/; use MARC::Batch; use MARC::Record; use MARC::File::XML ( BinaryEncoding => 'UTF-8' ); @@ -285,12 +286,7 @@ sub process_spool { $logger->info("processing record $count"); try { - (my $xml = $r->as_xml_record()) =~ s/\n//sog; - $xml =~ s/^<\?xml.+\?\s*>//go; - $xml =~ s/>\s+entityize($xml); - $xml =~ s/[\x00-\x1f]//go; + my $xml = clean_marc($r); my $qrec; # Check the leader to ensure we've got something resembling the expected diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm index e3e699f813..9ddca6e1df 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Utils/Normalize.pm @@ -3,9 +3,13 @@ use strict; use warnings; use Unicode::Normalize; use Encode; +use UNIVERSAL qw/isa/; +use MARC::Record; +use MARC::File::XML ( BinaryEncoding => 'UTF-8' ); +use OpenILS::Application::AppUtils; use Exporter 'import'; -our @EXPORT_OK = qw( naco_normalize search_normalize ); +our @EXPORT_OK = qw( clean_marc naco_normalize search_normalize ); sub naco_normalize { my $str = decode_utf8(shift); @@ -97,4 +101,24 @@ sub _normalize_codes { return lc $str; } +# Cleans up a MARC::Record or MARCXML string for storage in the +# Open-ILS database. +# +# Takes either a MARC::Record or a string of MARCXML. +# +# Returns a string of MARCXML as Open-ILS likes to store it. +# +# Assumes input is already in UTF-8. +sub clean_marc { + my $input = shift; + my $xml = (isa $input, 'MARC::Record') ? $input->as_xml_record() : $input; + $xml =~ s/\n//sog; + $xml =~ s/^<\?xml.+\?\s*>//go; + $xml =~ s/>\s+entityize($xml); + $xml =~ s/[\x00-\x1f]//go; + return $xml; +} + 1; -- 2.43.2