From 5bf63453e851810ba9179d45ff193bdfbc8dbff9 Mon Sep 17 00:00:00 2001 From: miker Date: Fri, 4 Nov 2005 21:31:56 +0000 Subject: [PATCH] WoRM Jr git-svn-id: svn://svn.open-ils.org/ILS/trunk@1959 dcc99617-32d9-48b4-a31d-7c20da2025e4 --- .../src/perlmods/OpenILS/Application/WoRM.pm | 1232 +++++++++++++++++ 1 file changed, 1232 insertions(+) create mode 100644 Open-ILS/src/perlmods/OpenILS/Application/WoRM.pm diff --git a/Open-ILS/src/perlmods/OpenILS/Application/WoRM.pm b/Open-ILS/src/perlmods/OpenILS/Application/WoRM.pm new file mode 100644 index 0000000000..f2803c860d --- /dev/null +++ b/Open-ILS/src/perlmods/OpenILS/Application/WoRM.pm @@ -0,0 +1,1232 @@ +package OpenILS::Application::WoRM; +use base qw/OpenSRF::Application/; +use open qw/:utf8/; + +use Unicode::Normalize; +use OpenSRF::EX qw/:try/; + +use OpenSRF::Utils::SettingsClient; +use OpenSRF::Utils::Logger qw/:level/; + +use OpenILS::Utils::FlatXML; +use OpenILS::Utils::Fieldmapper; +use JSON; + +use OpenILS::Utils::Fieldmapper; + +use XML::LibXML; +use XML::LibXSLT; +use Time::HiRes qw(time); + + +our $log = 'OpenSRF::Utils::Logger'; +our $xml_util = OpenILS::Utils::FlatXML->new(); + +our $parser = XML::LibXML->new(); +our $xslt = XML::LibXSLT->new(); +our $mods_sheet; +our $mads_sheet; + +our $st_sess; +sub st_sess { + my $self = shift; + my $sess = shift; + $st_sess = $sess if ($sess); + return $st_sess; +} + +our $xpathset = {}; + +sub initialize {} +sub child_init {} + +sub post_init { + $log->debug("Running post_init", DEBUG); + + unless ($mods_sheet) { + $log->debug("Loading MODS XSLT", DEBUG); + my $xslt_doc = $parser->parse_file( + OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl"); + $mods_sheet = $xslt->parse_stylesheet( $xslt_doc ); + } + + if (!__PACKAGE__->st_sess()) { + $log->debug("Creating cached storage server session", DEBUG); + __PACKAGE__->st_sess( OpenSRF::AppSession->create('open-ils.storage') ); + } + + unless (keys %$xpathset) { + my $req = __PACKAGE__->st_sess()->request('open-ils.storage.direct.config.metabib_field.retrieve.all'); + while (my $resp = $req->recv) { + my $f = $resp->content; + $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath; + $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id; + $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG); + } + } +} + + + +sub ensure_transaction { + my $self = shift; + my $client = shift; + + my $outer_xact = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1); + + try { + if (!$outer_xact) { + $log->debug("WoRM isn't inside a transaction, starting one now.", INFO); + __PACKAGE__->st_sess->connect; + my $r = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.begin' )->gather(1); + unless (defined $r and $r) { + __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.rollback' )->gather(1); + __PACKAGE__->st_sess->disconnect; + throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!") + } + } + } catch Error with { + $log->debug("WoRM Couldn't BEGIN transaction!", ERROR) + }; + + return __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1); +} + +sub commit_transaction { + my $self = shift; + my $client = shift; + + my $outer_xact = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1); + + try { + if (__PACKAGE__->st_sess->connected && $outer_xact) { + my $r = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.commit' )->gather(1); + unless (defined $r and $r) { + __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.rollback' )->gather(1); + throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!") + } + __PACKAGE__->st_sess->disconnect; + } else { + $log->debug("WoRM isn't inside a transaction.", INFO); + } + } catch Error with { + throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!") + }; +} + + +# -------------------------------------------------------------------------------- +# MARC index extraction + +package OpenILS::Application::WoRM::XPATH; +use base qw/OpenILS::Application::WoRM/; + +# give this a MODS documentElement and an XPATH expression +sub _xpath_to_string { + my $xml = shift; + my $xpath = shift; + my $ns_uri = shift; + my $ns_prefix = shift; + my $unique = shift; + + $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix); + + my $string = ""; + + # grab the set of matching nodes + my @nodes = $xml->findnodes( $xpath ); + for my $value (@nodes) { + + # grab all children of the node + my @children = $value->childNodes(); + for my $child (@children) { + + # add the childs content to the growing buffer + my $content = quotemeta($child->textContent); + next if ($unique && $string =~ /$content/); # uniquify the values + $string .= $child->textContent . " "; + } + if( ! @children ) { + $string .= $value->textContent . " "; + } + } + NFD($string); + $string =~ s/(\pM)//gso; + return $string; +} + +sub class_all_index_string_xml { + my $self = shift; + my $client = shift; + my $xml = shift; + my $class = shift; + my $type = shift; + + OpenILS::Application::WoRM->post_init(); + $xml = $parser->parse_string($xml) unless (ref $xml); + $client->respond($_) + for ( map { + { $_ => _xpath_to_string( + $mods_sheet->transform($xml)->documentElement, + $xpathset->{$class}->{$_}->{xpath}, + "http://www.loc.gov/mods/", + "mods", 1 ) } + } keys %{ $xpathset->{$class} } + ); + + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.xpath.class.xml", + method => "class_all_index_string_xml", + api_level => 1, + argc => 1, + stream => 1, +); + +sub class_all_index_string_record { + my $self = shift; + my $client = shift; + my $rec = shift; + my $class = shift; + + OpenILS::Application::WoRM->post_init(); + my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1); + + $client->respond($_) for ($self->method_lookup("open-ils.worm.xpath.class.xml")->run($r->marc, $class => $type)); + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.xpath.class.record", + method => "class_all_index_string_record", + api_level => 1, + argc => 1, + stream => 1, +); + + +sub class_index_string_xml { + my $self = shift; + my $client = shift; + my $xml = shift; + my $class = shift; + my $type = shift; + + OpenILS::Application::WoRM->post_init(); + $xml = $parser->parse_string($xml) unless (ref $xml); + return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 ); +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.xpath.xml", + method => "class_index_string_xml", + api_level => 1, + argc => 1, +); + +sub class_index_string_record { + my $self = shift; + my $client = shift; + my $rec = shift; + my $class = shift; + my $type = shift; + + OpenILS::Application::WoRM->post_init(); + my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1); + + my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $class => $type); + $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG); + return $d; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.xpath.record", + method => "class_index_string_record", + api_level => 1, + argc => 1, +); + + +# -------------------------------------------------------------------------------- +# MARC Descriptor + +package OpenILS::Application::WoRM::Biblio::Leader; +use base qw/OpenILS::Application::WoRM/; + +our %descriptor_code = ( + item_type => sub { substr($ldr,6,1); }, + item_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/o) ? substr($oo8,29,1) : substr($oo8,23,1); }, + bib_level => sub { substr($ldr,7,1); }, + control_type => sub { substr($ldr,8,1); }, + char_encoding => sub { substr($ldr,9,1); }, + enc_level => sub { substr($ldr,17,1); }, + cat_form => sub { substr($ldr,18,1); }, + pub_status => sub { substr($ldr,5,1); }, + item_lang => sub { substr($oo8,35,3); }, + #lit_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"; }, + audience => sub { substr($oo8,22,1); }, +); + +sub _extract_descriptors { + my $xml = shift; + + local $ldr = $xml->findvalue('//*[local-name()="leader"]'); + local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]'); + + my $rd_obj = Fieldmapper::metabib::record_descriptor->new; + for my $rd_field ( keys %descriptor_code ) { + $rd_obj->$rd_field( $descriptor_code{$rd_field}->() ); + } + + return $rd_obj; +} + +sub extract_desc_xml { + my $self = shift; + my $client = shift; + my $xml = shift; + + $xml = $parser->parse_string($xml) unless (ref $xml); + + return _extract_descriptors( $xml ); +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.biblio_leader.xml", + method => "extract_desc_xml", + api_level => 1, + argc => 1, +); + +sub extract_desc_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::WoRM->post_init(); + my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1); + + my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc); + $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG); + return $d; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.biblio_leader.record", + method => "extract_desc_record", + api_level => 1, + argc => 1, +); + +# -------------------------------------------------------------------------------- +# Flat MARC + +package OpenILS::Application::WoRM::FlatMARC; +use base qw/OpenILS::Application::WoRM/; + + +sub _marcxml_to_full_rows { + + my $marcxml = shift; + my $xmltype = shift || 'metabib'; + + my $type = "Fieldmapper::${xmltype}::full_rec"; + + my @ns_list; + + my ($root) = $marcxml->findnodes('//*[local-name()="record"]'); + + for my $tagline ( @{$root->getChildrenByTagName("leader")} ) { + next unless $tagline; + + my $ns = $type->new; + + $ns->tag( 'LDR' ); + my $val = NFD($tagline->textContent); + $val =~ s/(\pM+)//gso; + $ns->value( $val ); + + push @ns_list, $ns; + } + + for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) { + next unless $tagline; + + my $ns = $type->new; + + $ns->tag( $tagline->getAttribute( "tag" ) ); + my $val = NFD($tagline->textContent); + $val =~ s/(\pM+)//gso; + $ns->value( $val ); + + push @ns_list, $ns; + } + + for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) { + next unless $tagline; + + my $tag = $tagline->getAttribute( "tag" ); + my $ind1 = $tagline->getAttribute( "ind1" ); + my $ind2 = $tagline->getAttribute( "ind2" ); + + for my $data ( $tagline->childNodes ) { + next unless $data; + + my $ns = $type->new; + + $ns->tag( $tag ); + $ns->ind1( $ind1 ); + $ns->ind2( $ind2 ); + $ns->subfield( $data->getAttribute( "code" ) ); + my $val = NFD($data->textContent); + $val =~ s/(\pM+)//gso; + $ns->value( lc($val) ); + + push @ns_list, $ns; + } + } + + $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG); + return @ns_list; +} + +sub flat_marc_xml { + my $self = shift; + my $client = shift; + my $xml = shift; + + $xml = $parser->parse_string($xml) unless (ref $xml); + + my $type = 'metabib'; + $type = 'authority' if ($self->api_name =~ /authority/o); + + OpenILS::Application::WoRM->post_init(); + + $client->respond($_) for (_marcxml_to_full_rows($xml, $type)); + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.flat_marc.authority.xml", + method => "flat_marc_xml", + api_level => 1, + argc => 1, + stream => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.worm.flat_marc.biblio.xml", + method => "flat_marc_xml", + api_level => 1, + argc => 1, + stream => 1, +); + +sub flat_marc_record { + my $self = shift; + my $client = shift; + my $rec = shift; + + my $type = 'biblio'; + $type = 'authority' if ($self->api_name =~ /authority/o); + + OpenILS::Application::WoRM->post_init(); + my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec )->gather(1); + + $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc)); + return undef; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.flat_marc.biblio.record_entry", + method => "flat_marc_record", + api_level => 1, + argc => 1, + stream => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.worm.flat_marc.authority.record_entry", + method => "flat_marc_record", + api_level => 1, + argc => 1, + stream => 1, +); + + +# -------------------------------------------------------------------------------- +# Fingerprinting + +package OpenILS::Application::WoRM::Biblio::Fingerprint; +use base qw/OpenILS::Application::WoRM/; + +my @fp_mods_xpath = ( + '//mods:mods/mods:typeOfResource[text()="text"]' => [ + title => { + xpath => [ + '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]', + '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]', + '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]', + '//mods:mods/mods:titleInfo[mods:title and not(@type)]', + ], + fixup => sub { + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + NFD($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\pM+//gso; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text = lc($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\s+/ /sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/^\s*(.+)\s*$/$1/sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\b(?:the|an?)\b//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\[.[^\]]+\]//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\s*[;\/\.]*$//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + }, + }, + author => { + xpath => [ + '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart', + '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart', + ], + fixup => sub { + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + NFD($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\pM+//gso; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text = lc($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\s+/ /sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/^\s*(.+)\s*$/$1/sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/,?\s+.*$//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + }, + }, + ], + + '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [ + title => { + xpath => [ + '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]', + '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]', + '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]', + '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]', + '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]', + '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]', + '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]', + '//mods:mods/mods:titleInfo[mods:title and not(@type)]', + ], + fixup => sub { + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + NFD($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\pM+//gso; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text = lc($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\s+/ /sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/^\s*(.+)\s*$/$1/sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\b(?:the|an?)\b//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\[.[^\]]+\]//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\s*[;\/\.]*$//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + }, + }, + author => { + xpath => [ + '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart', + '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart', + '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart', + '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart', + ], + fixup => sub { + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + NFD($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\pM+//gso; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text = lc($text); + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/\s+/ /sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/^\s*(.+)\s*$/$1/sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + $text =~ s/,?\s+.*$//sgo; + $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL); + }, + }, + ], + +); + +push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1]; + +sub _fp_mods { + my $mods = shift; + $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 ); + + my $fp_string = ''; + + my $match_index = 0; + my $block_index = 1; + while ( my $match_xpath = $fp_mods_xpath[$match_index] ) { + if ( my @nodes = $mods->findnodes( $match_xpath ) ) { + + my $block_name_index = 0; + my $block_value_index = 1; + my $block = $fp_mods_xpath[$block_index]; + while ( my $part = $$block[$block_value_index] ) { + local $text; + for my $xpath ( @{ $part->{xpath} } ) { + $text = $mods->findvalue( $xpath ); + last if ($text); + } + + $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG); + + if ($text) { + $$part{fixup}->(); + $log->debug("Fingerprint text after fixup : [$text]", DEBUG); + $fp_string .= $text; + } + + $block_name_index += 2; + $block_value_index += 2; + } + } + if ($fp_string) { + $fp_string =~ s/\W+//gso; + $log->debug("Fingerprint is [$fp_string]", INFO);; + return $fp_string; + } + + $match_index += 2; + $block_index += 2; + } + return undef; +} + + +sub fingerprint_bibrec { + my $self = shift; + my $client = shift; + my $rec = shift; + + OpenILS::Application::WoRM->post_init(); + my $r = OpenILS::Application::WoRM->st_sess->request( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )->gather(1); + + my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc); + $log->debug("Returning [$fp] as fingerprint for record $rec", INFO); + return $fp; + +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.fingerprint.bib_record", + method => "fingerprint_bibrec", + api_level => 1, + argc => 1, +); + +sub fingerprint_mods { + my $self = shift; + my $client = shift; + my $xml = shift; + + OpenILS::Application::WoRM->post_init(); + my $mods = $parser->parse_string($xml)->documentElement; + + return _fp_mods( $mods ); +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.fingerprint.mods", + method => "fingerprint_mods", + api_level => 1, + argc => 1, +); + +sub fingerprint_marc { + my $self = shift; + my $client = shift; + my $xml = shift; + + $xml = $parser->parse_string($xml) unless (ref $xml); + + OpenILS::Application::WoRM->post_init(); + my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement ); + $log->debug("Returning [$fp] as fingerprint", INFO); + return $fp; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.fingerprint.marc", + method => "fingerprint_marc", + api_level => 1, + argc => 1, +); + + +# -------------------------------------------------------------------------------- + +1; +__END__ +my $in_xact; +my $begin; +my $commit; +my $rollback; +my $lookup; +my $update_entry; +my $mr_lookup; +my $mr_update; +my $mr_create; +my $create_source_map; +my $sm_lookup; +my $rm_old_rd; +my $rm_old_sm; +my $rm_old_fr; +my $rm_old_tr; +my $rm_old_ar; +my $rm_old_sr; +my $rm_old_kr; +my $rm_old_ser; + +my $fr_create; +my $rd_create; +my $create = {}; + +my %descriptor_code = ( + item_type => 'substr($ldr,6,1)', + item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)', + bib_level => 'substr($ldr,7,1)', + control_type => 'substr($ldr,8,1)', + char_encoding => 'substr($ldr,9,1)', + enc_level => 'substr($ldr,17,1)', + cat_form => 'substr($ldr,18,1)', + pub_status => 'substr($ldr,5,1)', + item_lang => 'substr($oo8,35,3)', + #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"', + audience => 'substr($oo8,22,1)', +); + +sub wormize { + + my $self = shift; + my $client = shift; + my @docids = @_; + + my $no_map = 0; + if ($self->api_name =~ /no_map/o) { + $no_map = 1; + } + + $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current') + unless ($in_xact); + $begin = $self->method_lookup( 'open-ils.storage.transaction.begin') + unless ($begin); + $commit = $self->method_lookup( 'open-ils.storage.transaction.commit') + unless ($commit); + $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback') + unless ($rollback); + $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source') + unless ($sm_lookup); + $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint') + unless ($mr_lookup); + $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update') + unless ($mr_update); + $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create') + unless ($mr_create); + $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create') + unless ($create_source_map); + $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve') + unless ($lookup); + $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update') + unless ($update_entry); + $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete') + unless ($rm_old_sm); + $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete') + unless ($rm_old_rd); + $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete') + unless ($rm_old_fr); + $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete') + unless ($rm_old_tr); + $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete') + unless ($rm_old_ar); + $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete') + unless ($rm_old_sr); + $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete') + unless ($rm_old_kr); + $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete') + unless ($rm_old_ser); + $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create') + unless ($rd_create); + $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create') + unless ($fr_create); + $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create') + unless ($$create{title}); + $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create') + unless ($$create{author}); + $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create') + unless ($$create{subject}); + $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create') + unless ($$create{keyword}); + $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create') + unless ($$create{series}); + + + my ($outer_xact) = $in_xact->run; + try { + unless ($outer_xact) { + $log->debug("WoRM isn't inside a transaction, starting one now.", INFO); + my ($r) = $begin->run($client); + unless (defined $r and $r) { + $rollback->run; + throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!") + } + } + } catch Error with { + throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!") + }; + + my @source_maps; + my @entry_list; + my @mr_list; + my @rd_list; + my @ns_list; + my @mods_data; + my $ret = 0; + for my $entry ( $lookup->run(@docids) ) { + # step -1: grab the doc from storage + next unless ($entry); + + if(!$mods_sheet) { + my $xslt_doc = $parser->parse_file( + OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl"); + $mods_sheet = $xslt->parse_stylesheet( $xslt_doc ); + } + + my $xml = $entry->marc; + my $docid = $entry->id; + my $marcdoc = $parser->parse_string($xml); + my $modsdoc = $mods_sheet->transform($marcdoc); + + my $mods = $modsdoc->documentElement; + $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 ); + + $entry->fingerprint( fingerprint_mods( $mods ) ); + push @entry_list, $entry; + + $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO); + + unless ($no_map) { + my ($mr) = $mr_lookup->run( $entry->fingerprint ); + if (!$mr || !@$mr) { + $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO); + $mr = new Fieldmapper::metabib::metarecord; + $mr->fingerprint( $entry->fingerprint ); + $mr->master_record( $entry->id ); + my ($new_mr) = $mr_create->run($mr); + $mr->id($new_mr); + unless (defined $mr) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!") + } + } else { + $log->debug("Retrieved metarecord, id is ".$mr->id, INFO); + $mr->mods(''); + push @mr_list, $mr; + } + + my $sm = new Fieldmapper::metabib::metarecord_source_map; + $sm->metarecord( $mr->id ); + $sm->source( $entry->id ); + push @source_maps, $sm; + } + + my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent; + my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]'); + + my $rd_obj = Fieldmapper::metabib::record_descriptor->new; + for my $rd_field ( keys %descriptor_code ) { + $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" ); + } + $rd_obj->record( $docid ); + push @rd_list, $rd_obj; + + push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) }; + + # step 2: build the KOHA rows + my @tmp_list = _marcxml_to_full_rows( $marcdoc ); + $_->record( $docid ) for (@tmp_list); + push @ns_list, @tmp_list; + + $ret++; + + last unless ($self->api_name =~ /batch$/o); + } + + $rm_old_rd->run( { record => \@docids } ); + $rm_old_fr->run( { record => \@docids } ); + $rm_old_sm->run( { source => \@docids } ) unless ($no_map); + $rm_old_tr->run( { source => \@docids } ); + $rm_old_ar->run( { source => \@docids } ); + $rm_old_sr->run( { source => \@docids } ); + $rm_old_kr->run( { source => \@docids } ); + $rm_old_ser->run( { source => \@docids } ); + + unless ($no_map) { + my ($sm) = $create_source_map->run(@source_maps); + unless (defined $sm) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!") + } + my ($mr) = $mr_update->run(@mr_list); + unless (defined $mr) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!") + } + } + + my ($re) = $update_entry->run(@entry_list); + unless (defined $re) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!") + } + + my ($rd) = $rd_create->run(@rd_list); + unless (defined $rd) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!") + } + + my ($fr) = $fr_create->run(@ns_list); + unless (defined $fr) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!") + } + + # step 5: insert the new metadata + for my $class ( qw/title author subject keyword series/ ) { + my @md_list = (); + for my $doc ( @mods_data ) { + my ($did) = keys %$doc; + my ($data) = values %$doc; + + my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry"; + for my $row ( keys %{ $$data{$class} } ) { + next unless (exists $$data{$class}{$row}); + next unless ($$data{$class}{$row}{value}); + my $fm_obj = $fm_constructor->new; + $fm_obj->value( $$data{$class}{$row}{value} ); + $fm_obj->field( $$data{$class}{$row}{field_id} ); + $fm_obj->source( $did ); + $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG); + + push @md_list, $fm_obj; + } + } + + my ($cr) = $$create{$class}->run(@md_list); + unless (defined $cr) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!") + } + } + + unless ($outer_xact) { + $log->debug("Commiting transaction started by the WoRM.", INFO); + my ($c) = $commit->run; + unless (defined $c and $c) { + $rollback->run; + throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!") + } + } + + return $ret; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.wormize", + method => "wormize", + api_level => 1, + argc => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.worm.wormize.no_map", + method => "wormize", + api_level => 1, + argc => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.worm.wormize.batch", + method => "wormize", + api_level => 1, + argc => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.worm.wormize.no_map.batch", + method => "wormize", + api_level => 1, + argc => 1, +); + + +my $ain_xact; +my $abegin; +my $acommit; +my $arollback; +my $alookup; +my $aupdate_entry; +my $amr_lookup; +my $amr_update; +my $amr_create; +my $acreate_source_map; +my $asm_lookup; +my $arm_old_rd; +my $arm_old_sm; +my $arm_old_fr; +my $arm_old_tr; +my $arm_old_ar; +my $arm_old_sr; +my $arm_old_kr; +my $arm_old_ser; + +my $afr_create; +my $ard_create; +my $acreate = {}; + +sub authority_wormize { + + my $self = shift; + my $client = shift; + my @docids = @_; + + my $no_map = 0; + if ($self->api_name =~ /no_map/o) { + $no_map = 1; + } + + $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current') + unless ($in_xact); + $begin = $self->method_lookup( 'open-ils.storage.transaction.begin') + unless ($begin); + $commit = $self->method_lookup( 'open-ils.storage.transaction.commit') + unless ($commit); + $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback') + unless ($rollback); + $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve') + unless ($alookup); + $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update') + unless ($aupdate_entry); + $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete') + unless ($arm_old_rd); + $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete') + unless ($arm_old_fr); + $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create') + unless ($ard_create); + $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create') + unless ($afr_create); + + + my ($outer_xact) = $in_xact->run; + try { + unless ($outer_xact) { + $log->debug("WoRM isn't inside a transaction, starting one now.", INFO); + my ($r) = $begin->run($client); + unless (defined $r and $r) { + $rollback->run; + throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!") + } + } + } catch Error with { + throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!") + }; + + my @source_maps; + my @entry_list; + my @mr_list; + my @rd_list; + my @ns_list; + my @mads_data; + my $ret = 0; + for my $entry ( $lookup->run(@docids) ) { + # step -1: grab the doc from storage + next unless ($entry); + + #if(!$mads_sheet) { + # my $xslt_doc = $parser->parse_file( + # OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') . "/MARC21slim2MODS.xsl"); + # $mads_sheet = $xslt->parse_stylesheet( $xslt_doc ); + #} + + my $xml = $entry->marc; + my $docid = $entry->id; + my $marcdoc = $parser->parse_string($xml); + #my $madsdoc = $mads_sheet->transform($marcdoc); + + #my $mads = $madsdoc->documentElement; + #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 ); + + push @entry_list, $entry; + + my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent; + my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]'); + + my $rd_obj = Fieldmapper::authority::record_descriptor->new; + for my $rd_field ( keys %descriptor_code ) { + $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" ); + } + $rd_obj->record( $docid ); + push @rd_list, $rd_obj; + + # step 2: build the KOHA rows + my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' ); + $_->record( $docid ) for (@tmp_list); + push @ns_list, @tmp_list; + + $ret++; + + last unless ($self->api_name =~ /batch$/o); + } + + $arm_old_rd->run( { record => \@docids } ); + $arm_old_fr->run( { record => \@docids } ); + + my ($rd) = $ard_create->run(@rd_list); + unless (defined $rd) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!") + } + + my ($fr) = $fr_create->run(@ns_list); + unless (defined $fr) { + throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!") + } + + unless ($outer_xact) { + $log->debug("Commiting transaction started by the WoRM.", INFO); + my ($c) = $commit->run; + unless (defined $c and $c) { + $rollback->run; + throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!") + } + } + + return $ret; +} +__PACKAGE__->register_method( + api_name => "open-ils.worm.authortiy.wormize", + method => "wormize", + api_level => 1, + argc => 1, +); +__PACKAGE__->register_method( + api_name => "open-ils.worm.authority.wormize.batch", + method => "wormize", + api_level => 1, + argc => 1, +); + + +# -------------------------------------------------------------------------------- + + +sub _marcxml_to_full_rows { + + my $marcxml = shift; + my $type = shift || 'Fieldmapper::metabib::full_rec'; + + my @ns_list; + + my $root = $marcxml->documentElement; + + for my $tagline ( @{$root->getChildrenByTagName("leader")} ) { + next unless $tagline; + + my $ns = new Fieldmapper::metabib::full_rec; + + $ns->tag( 'LDR' ); + my $val = NFD($tagline->textContent); + $val =~ s/(\pM+)//gso; + $ns->value( $val ); + + push @ns_list, $ns; + } + + for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) { + next unless $tagline; + + my $ns = new Fieldmapper::metabib::full_rec; + + $ns->tag( $tagline->getAttribute( "tag" ) ); + my $val = NFD($tagline->textContent); + $val =~ s/(\pM+)//gso; + $ns->value( $val ); + + push @ns_list, $ns; + } + + for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) { + next unless $tagline; + + my $tag = $tagline->getAttribute( "tag" ); + my $ind1 = $tagline->getAttribute( "ind1" ); + my $ind2 = $tagline->getAttribute( "ind2" ); + + for my $data ( $tagline->childNodes ) { + next unless $data; + + my $ns = $type->new; + + $ns->tag( $tag ); + $ns->ind1( $ind1 ); + $ns->ind2( $ind2 ); + $ns->subfield( $data->getAttribute( "code" ) ); + my $val = NFD($data->textContent); + $val =~ s/(\pM+)//gso; + $ns->value( lc($val) ); + + push @ns_list, $ns; + } + } + return @ns_list; +} + +sub _get_field_value { + + my( $root, $xpath ) = @_; + + my $string = ""; + + # grab the set of matching nodes + my @nodes = $root->findnodes( $xpath ); + for my $value (@nodes) { + + # grab all children of the node + my @children = $value->childNodes(); + for my $child (@children) { + + # add the childs content to the growing buffer + my $content = quotemeta($child->textContent); + next if ($string =~ /$content/); # uniquify the values + $string .= $child->textContent . " "; + } + if( ! @children ) { + $string .= $value->textContent . " "; + } + } + $string = NFD($string); + $string =~ s/(\pM)//gso; + return lc($string); +} + + +sub modsdoc_to_values { + my( $self, $mods ) = @_; + my $data = {}; + for my $class (keys %$xpathset) { + $data->{$class} = {}; + for my $type (keys %{$xpathset->{$class}}) { + $data->{$class}->{$type} = {}; + $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id}; + } + } + return $data; +} + + +1; + + -- 2.43.2