1 package OpenILS::Application::Storage::WORM;
2 use base qw/OpenILS::Application::Storage/;
3 use strict; use warnings;
5 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::Logger qw/:level/;
8 my $log = 'OpenSRF::Utils::Logger';
10 use OpenILS::Utils::FlatXML;
11 use OpenILS::Utils::Fieldmapper;
16 use Time::HiRes qw(time);
18 my $xml_util = OpenILS::Utils::FlatXML->new();
20 my $parser = XML::LibXML->new();
21 my $xslt = XML::LibXSLT->new();
22 my $xslt_doc = $parser->parse_file( "/home/miker/cvs/OpenILS/app_server/stylesheets/MARC21slim2MODS.xsl" );
23 #my $xslt_doc = $parser->parse_file( "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
24 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
31 my $meth = __PACKAGE__->method_lookup('open-ils.storage.direct.config.metabib_field.all');
32 for my $f ($meth->run) {
33 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
34 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
35 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
39 # --------------------------------------------------------------------------------
43 '//mods:mods/mods:typeOfResource[text()="text"]' => [
46 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
47 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
48 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
49 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
55 $text =~ s/^\s*(.+)\s*$/$1/sgo;
56 $text =~ s/\b(?:the|an?)\b//sgo;
57 $text =~ s/\[.[^\]]+\]//sgo;
58 $text =~ s/\s*[;\/\.]*$//sgo;
64 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
65 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
71 $text =~ s/^\s*(.+)\s*$/$1/sgo;
72 ($text) = split ",", $text;
78 '//mods:mods/mods:relatedItem[@type!="host"]' => [
81 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
82 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
83 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
84 '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
85 '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
86 '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
87 '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
88 '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
94 $text =~ s/^\s*(.+)\s*$/$1/sgo;
95 $text =~ s/\b(?:the|an?)\b//sgo;
96 $text =~ s/\[.[^\]]+\]//sgo;
97 $text =~ s/\s*[;\/\.]*$//sgo;
103 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
104 '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
105 '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
106 '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
111 $text =~ s/\s+/ /sgo;
112 $text =~ s/^\s*(.+)\s*$/$1/sgo;
113 ($text) = split ",", $text;
121 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
123 sub fingerprint_mods {
130 while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
131 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
133 my $block_name_index = 0;
134 my $block_value_index = 1;
135 my $block = $fp_mods_xpath[$block_index];
136 while ( my $part = $$block[$block_value_index] ) {
138 for my $xpath ( @{ $part->{xpath} } ) {
139 $text = $mods->findvalue( $xpath );
143 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
150 $block_name_index += 2;
151 $block_value_index += 2;
155 $fp_string =~ s/\W+//gso;
156 $log->debug("Fingerprint is [$fp_string]", INFO);;
168 # --------------------------------------------------------------------------------
187 my %descriptor_code = (
188 item_type => 'substr($ldr,6,1)',
189 item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
190 bib_level => 'substr($ldr,7,1)',
191 control_type => 'substr($ldr,8,1)',
192 char_encoding => 'substr($ldr,9,1)',
193 enc_level => 'substr($ldr,17,1)',
194 cat_form => 'substr($ldr,18,1)',
195 pub_status => 'substr($ldr,5,1)',
196 item_lang => 'substr($oo8,35,3)',
197 audience => 'substr($oo8,22,1)',
206 $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
208 $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
210 $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
212 $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_marc.batch.retrieve')
214 $fetch_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.retrieve')
215 unless ($update_entry);
216 $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
217 unless ($update_entry);
218 $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
220 $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
222 $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
224 $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
226 $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
228 $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
230 $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
232 $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
234 $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
235 unless ($$create{title});
236 $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
237 unless ($$create{author});
238 $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
239 unless ($$create{subject});
240 $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
241 unless ($$create{keyword});
245 my ($r) = $begin->run($client);
246 unless (defined $r and $r) {
248 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
251 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
259 for my $marc ( $lookup->run(@docids) ) {
260 # step -1: grab the doc from storage
263 my $xml = $marc->marc;
264 my $docid = $marc->id;
265 my $marcdoc = $parser->parse_string($xml);
266 my $modsdoc = $mods_sheet->transform($marcdoc);
268 my $mods = $modsdoc->documentElement;
269 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
271 my ($entry) = $fetch_entry->run($docid);
272 $entry->fingerprint( fingerprint_mods( $mods ) );
273 push @entry_list, $entry;
275 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
276 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
278 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
279 for my $rd_field ( keys %descriptor_code ) {
280 $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
282 $rd_obj->record( $docid );
283 push @rd_list, $rd_obj;
285 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
287 # step 2: build the KOHA rows
288 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
289 $_->record( $docid ) for (@tmp_list);
290 push @ns_list, @tmp_list;
294 last unless ($self->api_name =~ /batch$/o);
297 $rm_old_rd->run( { record => \@docids } );
298 $rm_old_fr->run( { record => \@docids } );
299 $rm_old_tr->run( { source => \@docids } );
300 $rm_old_ar->run( { source => \@docids } );
301 $rm_old_sr->run( { source => \@docids } );
302 $rm_old_kr->run( { source => \@docids } );
304 my ($re) = $update_entry->run(@entry_list);
305 unless (defined $re) {
306 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
309 my ($rd) = $rd_create->run(@rd_list);
310 unless (defined $rd) {
311 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
314 my ($fr) = $fr_create->run(@ns_list);
315 unless (defined $fr) {
316 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
319 # step 5: insert the new metadata
320 for my $class ( qw/title author subject keyword/ ) {
322 for my $doc ( @mods_data ) {
323 my ($did) = keys %$doc;
324 my ($data) = values %$doc;
326 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
327 for my $row ( keys %{ $$data{$class} } ) {
328 next unless (exists $$data{$class}{$row});
329 next unless ($$data{$class}{$row}{value});
330 my $fm_obj = $fm_constructor->new;
331 $fm_obj->value( $$data{$class}{$row}{value} );
332 $fm_obj->field( $$data{$class}{$row}{field_id} );
333 $fm_obj->source( $did );
334 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
336 push @md_list, $fm_obj;
340 my ($cr) = $$create{$class}->run(@md_list);
341 unless (defined $cr) {
342 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
346 my ($c) = $commit->run;
347 unless (defined $c and $c) {
349 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
354 __PACKAGE__->register_method(
355 api_name => "open-ils.worm.wormize",
360 __PACKAGE__->register_method(
361 api_name => "open-ils.worm.wormize.batch",
368 # --------------------------------------------------------------------------------
371 sub _marcxml_to_full_rows {
377 my $root = $marcxml->documentElement;
379 for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
380 next unless $tagline;
382 my $ns = new Fieldmapper::metabib::full_rec;
385 my $val = $tagline->textContent;
386 $val =~ s/(\pM)//gso;
392 for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
393 next unless $tagline;
395 my $ns = new Fieldmapper::metabib::full_rec;
397 $ns->tag( $tagline->getAttribute( "tag" ) );
398 my $val = $tagline->textContent;
399 $val =~ s/(\pM)//gso;
405 for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
406 next unless $tagline;
408 my $tag = $tagline->getAttribute( "tag" );
409 my $ind1 = $tagline->getAttribute( "ind1" );
410 my $ind2 = $tagline->getAttribute( "ind2" );
412 for my $data ( $tagline->childNodes ) {
415 my $ns = new Fieldmapper::metabib::full_rec;
420 $ns->subfield( $data->getAttribute( "code" ) );
421 my $val = $data->textContent;
422 $val =~ s/(\pM)//gso;
423 $ns->value( lc($val) );
431 sub _get_field_value {
433 my( $root, $xpath ) = @_;
437 # grab the set of matching nodes
438 my @nodes = $root->findnodes( $xpath );
439 for my $value (@nodes) {
441 # grab all children of the node
442 my @children = $value->childNodes();
443 for my $child (@children) {
445 # add the childs content to the growing buffer
446 my $content = quotemeta($child->textContent);
447 next if ($string =~ /$content/); # uniquify the values
448 $string .= $child->textContent . " ";
451 $string .= $value->textContent . " ";
454 $string =~ s/(\pM)//gso;
459 sub modsdoc_to_values {
460 my( $self, $mods ) = @_;
462 for my $class (keys %$xpathset) {
463 $data->{$class} = {};
464 for my $type (keys %{$xpathset->{$class}}) {
465 $data->{$class}->{$type} = {};
466 $data->{$class}->{$type}->{value} = _get_field_value( $mods, $xpathset->{$class}->{$type}->{xpath} );
467 $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};