]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/WoRM.pm
added WoRM transaction semantics; we can now scrub records of metadata
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / WoRM.pm
1 package OpenILS::Application::WoRM;
2 use base qw/OpenSRF::Application/;
3 use open qw/:utf8/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::FlatXML;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21
22 our $log = 'OpenSRF::Utils::Logger';
23 our $xml_util = OpenILS::Utils::FlatXML->new();
24
25 our $parser = XML::LibXML->new();
26 our $xslt = XML::LibXSLT->new();
27 our $mods_sheet;
28 our $mads_sheet;
29
30 our $st_sess;
31 sub st_sess {
32         my $self = shift;
33         my $sess = shift;
34         $st_sess = $sess if ($sess);
35         return $st_sess;
36 }
37
38 our $xpathset = {};
39
40 sub initialize {}
41 sub child_init {}
42
43 sub post_init {
44         $log->debug("Running post_init", DEBUG);
45
46         unless ($mods_sheet) {
47                 $log->debug("Loading MODS XSLT", DEBUG);
48                 my $xslt_doc = $parser->parse_file(
49                         OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
50                 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
51         }
52
53         if (!__PACKAGE__->st_sess()) {
54                 $log->debug("Creating cached storage server session", DEBUG);
55                 __PACKAGE__->st_sess( OpenSRF::AppSession->create('open-ils.storage') );
56         }
57
58         unless (keys %$xpathset) {
59                 my $req = __PACKAGE__->st_sess()->request('open-ils.storage.direct.config.metabib_field.retrieve.all');
60                 while (my $resp = $req->recv) {
61                         my $f = $resp->content;
62                         $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
63                         $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
64                         $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
65                 }
66         }
67 }
68
69
70 sub in_transaction {
71         OpenILS::Application::WoRM->post_init();
72         return __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1);
73 }
74
75 sub begin_transaction {
76         my $self = shift;
77         my $client = shift;
78         
79         OpenILS::Application::WoRM->post_init();
80         my $outer_xact = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1);
81         
82         try {
83                 if (!$outer_xact) {
84                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
85                         __PACKAGE__->st_sess->connect;
86                         my $r = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.begin' )->gather(1);
87                         unless (defined $r and $r) {
88                                 __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.rollback' )->gather(1);
89                                 __PACKAGE__->st_sess->disconnect;
90                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
91                         }
92                 }
93         } catch Error with {
94                 $log->debug("WoRM Couldn't BEGIN transaction!", ERROR)
95         };
96
97         return __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1);
98 }
99
100 sub commit_transaction {
101         my $self = shift;
102         my $client = shift;
103
104         OpenILS::Application::WoRM->post_init();
105         my $outer_xact = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1);
106
107         try {
108                 if (__PACKAGE__->st_sess->connected && $outer_xact) {
109                         my $r = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.commit' )->gather(1);
110                         unless (defined $r and $r) {
111                                 __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.rollback' )->gather(1);
112                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
113                         }
114                         __PACKAGE__->st_sess->disconnect;
115                 } else {
116                         $log->debug("WoRM isn't inside a transaction.", INFO);
117                 }
118         } catch Error with {
119                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
120         };
121
122         return 1;
123 }
124
125 sub storage_req {
126         my $self = shift;
127         __PACKAGE__->st_sess->request( @_ )->gather(1);
128 }
129
130 sub scrub_authority_record {
131         my $self = shift;
132         my $client = shift;
133         my $rec = shift;
134
135         my $commit = 0;
136         if (!OpenILS::Application::WoRM->in_transaction) {
137                 OpenILS::Application::WoRM->begin_transaction || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
138                 $commit = 1;
139         }
140
141         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.full_rec.mass_delete', { record => $rec } );
142         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.authority.record_descriptor.mass_delete', { record => $rec } );
143
144         OpenILS::Application::WoRM->commit_transaction if ($commit);
145         return 1;
146 }
147 __PACKAGE__->register_method(  
148         api_name        => "open-ils.worm.scrub.authority",
149         method          => "scrub_authority_record",
150         api_level       => 1,
151         argc            => 1,
152 );                      
153
154
155 sub scrub_metabib_record {
156         my $self = shift;
157         my $client = shift;
158         my $rec = shift;
159
160         my $commit = 0;
161         if (!OpenILS::Application::WoRM->in_transaction) {
162                 OpenILS::Application::WoRM->begin_transaction || throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!");
163                 $commit = 1;
164         }
165
166         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.full_rec.mass_delete', { record => $rec } );
167         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete', { source => $rec } );
168         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete', { record => $rec } );
169         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete', { source => $rec } );
170         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete', { source => $rec } );
171         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete', { source => $rec } );
172         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete', { source => $rec } );
173         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete', { source => $rec } );
174
175         my $mr = OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.search_where', { master_record => $rec } );
176
177         if ($mr) {
178                 my $others = OpenILS::Application::WoRM->storage_req(
179                                 'open-ils.storage.direct.metabib.metarecord_source_map.search_where.atomic',
180                                 { metarecord => $mr->id }
181                 );
182
183                 if (@$others) {
184                         $mr->master_record($others->[0]->source);
185                         OpenILS::Application::WoRM->storage_req(
186                                 'open-ils.storage.direct.metabib.metarecord.remote_update',
187                                 { id => $mr->id },
188                                 { master_record => $others->[0]->source }
189                         );
190                 } else {
191                         OpenILS::Application::WoRM->storage_req( 'open-ils.storage.direct.metabib.metarecord.delete', $mr->id );
192                 }
193         }
194
195         OpenILS::Application::WoRM->commit_transaction if ($commit);
196         return 1;
197 }
198 __PACKAGE__->register_method(  
199         api_name        => "open-ils.worm.scrub.biblio",
200         method          => "scrub_metabib_record",
201         api_level       => 1,
202         argc            => 1,
203 );                      
204
205
206 # --------------------------------------------------------------------------------
207 # MARC index extraction
208
209 package OpenILS::Application::WoRM::XPATH;
210 use base qw/OpenILS::Application::WoRM/;
211
212 # give this a MODS documentElement and an XPATH expression
213 sub _xpath_to_string {
214         my $xml = shift;
215         my $xpath = shift;
216         my $ns_uri = shift;
217         my $ns_prefix = shift;
218         my $unique = shift;
219
220         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
221
222         my $string = "";
223
224         # grab the set of matching nodes
225         my @nodes = $xml->findnodes( $xpath );
226         for my $value (@nodes) {
227
228                 # grab all children of the node
229                 my @children = $value->childNodes();
230                 for my $child (@children) {
231
232                         # add the childs content to the growing buffer
233                         my $content = quotemeta($child->textContent);
234                         next if ($unique && $string =~ /$content/);  # uniquify the values
235                         $string .= $child->textContent . " ";
236                 }
237                 if( ! @children ) {
238                         $string .= $value->textContent . " ";
239                 }
240         }
241         NFD($string);
242         $string =~ s/(\pM)//gso;
243         return $string;
244 }
245
246 sub class_all_index_string_xml {
247         my $self = shift;
248         my $client = shift;
249         my $xml = shift;
250         my $class = shift;
251
252         OpenILS::Application::WoRM->post_init();
253         $xml = $parser->parse_string($xml) unless (ref $xml);
254         
255         my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
256         for my $type ( keys %{ $xpathset->{$class} } ) {
257                 my $value =  _xpath_to_string(
258                                 $mods_sheet->transform($xml)->documentElement,
259                                 $xpathset->{$class}->{$type}->{xpath},
260                                 "http://www.loc.gov/mods/",
261                                 "mods",
262                                 1
263                 );
264
265                 next unless $value;
266
267                 my $fm = $class_constructor->new;
268                 $fm->value( $value );
269                 $fm->field( $xpathset->{$class}->{$type}->{id} );
270                 $client->respond($fm);
271         }
272         return undef;
273 }
274 __PACKAGE__->register_method(  
275         api_name        => "open-ils.worm.field_entry.class.xml",
276         method          => "class_all_index_string_xml",
277         api_level       => 1,
278         argc            => 1,
279         stream          => 1,
280 );                      
281
282 sub class_all_index_string_record {
283         my $self = shift;
284         my $client = shift;
285         my $rec = shift;
286         my $class = shift;
287
288         OpenILS::Application::WoRM->post_init();
289         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1);
290
291         for my $fm ($self->method_lookup("open-ils.worm.field_entry.class.xml")->run($r->marc, $class)) {
292                 $fm->source($rec);
293                 $client->respond($fm);
294         }
295         return undef;
296 }
297 __PACKAGE__->register_method(  
298         api_name        => "open-ils.worm.field_entry.class.record",
299         method          => "class_all_index_string_record",
300         api_level       => 1,
301         argc            => 1,
302         stream          => 1,
303 );                      
304
305
306 sub class_index_string_xml {
307         my $self = shift;
308         my $client = shift;
309         my $xml = shift;
310         my $class = shift;
311         my $type = shift;
312
313         OpenILS::Application::WoRM->post_init();
314         $xml = $parser->parse_string($xml) unless (ref $xml);
315         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
316 }
317 __PACKAGE__->register_method(  
318         api_name        => "open-ils.worm.class.type.xml",
319         method          => "class_index_string_xml",
320         api_level       => 1,
321         argc            => 1,
322 );                      
323
324 sub class_index_string_record {
325         my $self = shift;
326         my $client = shift;
327         my $rec = shift;
328         my $class = shift;
329         my $type = shift;
330
331         OpenILS::Application::WoRM->post_init();
332         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1);
333
334         my ($d) = $self->method_lookup("open-ils.worm.class.type.xml")->run($r->marc, $class => $type);
335         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
336         return $d;
337 }
338 __PACKAGE__->register_method(  
339         api_name        => "open-ils.worm.class.type.record",
340         method          => "class_index_string_record",
341         api_level       => 1,
342         argc            => 1,
343 );                      
344
345 sub xml_xpath {
346         my $self = shift;
347         my $client = shift;
348         my $xml = shift;
349         my $xpath = shift;
350         my $uri = shift;
351         my $prefix = shift;
352         my $unique = shift;
353
354         OpenILS::Application::WoRM->post_init();
355         $xml = $parser->parse_string($xml) unless (ref $xml);
356         return _xpath_to_string( $xml->documentElement, $xpath, $uri, $prefix, $unique );
357 }
358 __PACKAGE__->register_method(  
359         api_name        => "open-ils.worm.xpath.xml",
360         method          => "xml_xpath",
361         api_level       => 1,
362         argc            => 1,
363 );                      
364
365 sub record_xpath {
366         my $self = shift;
367         my $client = shift;
368         my $rec = shift;
369         my $xpath = shift;
370         my $uri = shift;
371         my $prefix = shift;
372         my $unique = shift;
373
374         OpenILS::Application::WoRM->post_init();
375         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1);
376
377         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $xpath, $uri, $prefix, $unique );
378         $log->debug("XPath [$xpath] bib rec $rec returns ($d)", DEBUG);
379         return $d;
380 }
381 __PACKAGE__->register_method(  
382         api_name        => "open-ils.worm.xpath.record",
383         method          => "record_xpath",
384         api_level       => 1,
385         argc            => 1,
386 );                      
387
388
389 # --------------------------------------------------------------------------------
390 # MARC Descriptor
391
392 package OpenILS::Application::WoRM::Biblio::Leader;
393 use base qw/OpenILS::Application::WoRM/;
394
395 our %descriptor_code = (
396         item_type => sub { substr($ldr,6,1); },
397         item_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/o) ? substr($oo8,29,1) : substr($oo8,23,1); },
398         bib_level => sub { substr($ldr,7,1); },
399         control_type => sub { substr($ldr,8,1); },
400         char_encoding => sub { substr($ldr,9,1); },
401         enc_level => sub { substr($ldr,17,1); },
402         cat_form => sub { substr($ldr,18,1); },
403         pub_status => sub { substr($ldr,5,1); },
404         item_lang => sub { substr($oo8,35,3); },
405         #lit_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"; },
406         audience => sub { substr($oo8,22,1); },
407 );
408
409 sub _extract_descriptors {
410         my $xml = shift;
411
412         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
413         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
414
415         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
416         for my $rd_field ( keys %descriptor_code ) {
417                 $rd_obj->$rd_field( $descriptor_code{$rd_field}->() );
418         }
419
420         return $rd_obj;
421 }
422
423 sub extract_desc_xml {
424         my $self = shift;
425         my $client = shift;
426         my $xml = shift;
427
428         $xml = $parser->parse_string($xml) unless (ref $xml);
429
430         return _extract_descriptors( $xml );
431 }
432 __PACKAGE__->register_method(  
433         api_name        => "open-ils.worm.biblio_leader.xml",
434         method          => "extract_desc_xml",
435         api_level       => 1,
436         argc            => 1,
437 );                      
438
439 sub extract_desc_record {
440         my $self = shift;
441         my $client = shift;
442         my $rec = shift;
443
444         OpenILS::Application::WoRM->post_init();
445         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1);
446
447         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
448         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
449         return $d;
450 }
451 __PACKAGE__->register_method(  
452         api_name        => "open-ils.worm.biblio_leader.record",
453         method          => "extract_desc_record",
454         api_level       => 1,
455         argc            => 1,
456 );                      
457
458 # --------------------------------------------------------------------------------
459 # Flat MARC
460
461 package OpenILS::Application::WoRM::FlatMARC;
462 use base qw/OpenILS::Application::WoRM/;
463
464
465 sub _marcxml_to_full_rows {
466
467         my $marcxml = shift;
468         my $xmltype = shift || 'metabib';
469
470         my $type = "Fieldmapper::${xmltype}::full_rec";
471
472         my @ns_list;
473         
474         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
475
476         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
477                 next unless $tagline;
478
479                 my $ns = $type->new;
480
481                 $ns->tag( 'LDR' );
482                 my $val = NFD($tagline->textContent);
483                 $val =~ s/(\pM+)//gso;
484                 $ns->value( $val );
485
486                 push @ns_list, $ns;
487         }
488
489         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
490                 next unless $tagline;
491
492                 my $ns = $type->new;
493
494                 $ns->tag( $tagline->getAttribute( "tag" ) );
495                 my $val = NFD($tagline->textContent);
496                 $val =~ s/(\pM+)//gso;
497                 $ns->value( $val );
498
499                 push @ns_list, $ns;
500         }
501
502         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
503                 next unless $tagline;
504
505                 my $tag = $tagline->getAttribute( "tag" );
506                 my $ind1 = $tagline->getAttribute( "ind1" );
507                 my $ind2 = $tagline->getAttribute( "ind2" );
508
509                 for my $data ( $tagline->childNodes ) {
510                         next unless $data;
511
512                         my $ns = $type->new;
513
514                         $ns->tag( $tag );
515                         $ns->ind1( $ind1 );
516                         $ns->ind2( $ind2 );
517                         $ns->subfield( $data->getAttribute( "code" ) );
518                         my $val = NFD($data->textContent);
519                         $val =~ s/(\pM+)//gso;
520                         $ns->value( lc($val) );
521
522                         push @ns_list, $ns;
523                 }
524         }
525
526         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
527         return @ns_list;
528 }
529
530 sub flat_marc_xml {
531         my $self = shift;
532         my $client = shift;
533         my $xml = shift;
534
535         $xml = $parser->parse_string($xml) unless (ref $xml);
536
537         my $type = 'metabib';
538         $type = 'authority' if ($self->api_name =~ /authority/o);
539
540         OpenILS::Application::WoRM->post_init();
541
542         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
543         return undef;
544 }
545 __PACKAGE__->register_method(  
546         api_name        => "open-ils.worm.flat_marc.authority.xml",
547         method          => "flat_marc_xml",
548         api_level       => 1,
549         argc            => 1,
550         stream          => 1,
551 );                      
552 __PACKAGE__->register_method(  
553         api_name        => "open-ils.worm.flat_marc.biblio.xml",
554         method          => "flat_marc_xml",
555         api_level       => 1,
556         argc            => 1,
557         stream          => 1,
558 );                      
559
560 sub flat_marc_record {
561         my $self = shift;
562         my $client = shift;
563         my $rec = shift;
564
565         my $type = 'biblio';
566         $type = 'authority' if ($self->api_name =~ /authority/o);
567
568         OpenILS::Application::WoRM->post_init();
569         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec )->gather(1);
570
571         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
572         return undef;
573 }
574 __PACKAGE__->register_method(  
575         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
576         method          => "flat_marc_record",
577         api_level       => 1,
578         argc            => 1,
579         stream          => 1,
580 );                      
581 __PACKAGE__->register_method(  
582         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
583         method          => "flat_marc_record",
584         api_level       => 1,
585         argc            => 1,
586         stream          => 1,
587 );                      
588
589
590 # --------------------------------------------------------------------------------
591 # Fingerprinting
592
593 package OpenILS::Application::WoRM::Biblio::Fingerprint;
594 use base qw/OpenILS::Application::WoRM/;
595
596 my @fp_mods_xpath = (
597         '//mods:mods/mods:typeOfResource[text()="text"]' => [
598                         title   => {
599                                         xpath   => [
600                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
601                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
602                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
603                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
604                                         ],
605                                         fixup   => sub {
606                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
607                                                         NFD($text);
608                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
609                                                         $text =~ s/\pM+//gso;
610                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
611                                                         $text = lc($text);
612                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
613                                                         $text =~ s/\s+/ /sgo;
614                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
615                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
616                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
617                                                         $text =~ s/\b(?:the|an?)\b//sgo;
618                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
619                                                         $text =~ s/\[.[^\]]+\]//sgo;
620                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
621                                                         $text =~ s/\s*[;\/\.]*$//sgo;
622                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
623                                                 },
624                         },
625                         author  => {
626                                         xpath   => [
627                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
628                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
629                                         ],
630                                         fixup   => sub {
631                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
632                                                         NFD($text);
633                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
634                                                         $text =~ s/\pM+//gso;
635                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
636                                                         $text = lc($text);
637                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
638                                                         $text =~ s/\s+/ /sgo;
639                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
640                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
641                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
642                                                         $text =~ s/,?\s+.*$//sgo;
643                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
644                                                 },
645                         },
646         ],
647
648         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
649                         title   => {
650                                         xpath   => [
651                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
652                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
653                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
654                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
655                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
656                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
657                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
658                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
659                                         ],
660                                         fixup   => sub {
661                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
662                                                         NFD($text);
663                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
664                                                         $text =~ s/\pM+//gso;
665                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
666                                                         $text = lc($text);
667                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
668                                                         $text =~ s/\s+/ /sgo;
669                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
670                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
671                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
672                                                         $text =~ s/\b(?:the|an?)\b//sgo;
673                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
674                                                         $text =~ s/\[.[^\]]+\]//sgo;
675                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
676                                                         $text =~ s/\s*[;\/\.]*$//sgo;
677                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
678                                                 },
679                         },
680                         author  => {
681                                         xpath   => [
682                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
683                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
684                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
685                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
686                                         ],
687                                         fixup   => sub {
688                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
689                                                         NFD($text);
690                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
691                                                         $text =~ s/\pM+//gso;
692                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
693                                                         $text = lc($text);
694                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
695                                                         $text =~ s/\s+/ /sgo;
696                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
697                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
698                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
699                                                         $text =~ s/,?\s+.*$//sgo;
700                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
701                                                 },
702                         },
703         ],
704
705 );
706
707 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
708
709 sub _fp_mods {
710         my $mods = shift;
711         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
712
713         my $fp_string = '';
714
715         my $match_index = 0;
716         my $block_index = 1;
717         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
718                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
719
720                         my $block_name_index = 0;
721                         my $block_value_index = 1;
722                         my $block = $fp_mods_xpath[$block_index];
723                         while ( my $part = $$block[$block_value_index] ) {
724                                 local $text;
725                                 for my $xpath ( @{ $part->{xpath} } ) {
726                                         $text = $mods->findvalue( $xpath );
727                                         last if ($text);
728                                 }
729
730                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
731
732                                 if ($text) {
733                                         $$part{fixup}->();
734                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
735                                         $fp_string .= $text;
736                                 }
737
738                                 $block_name_index += 2;
739                                 $block_value_index += 2;
740                         }
741                 }
742                 if ($fp_string) {
743                         $fp_string =~ s/\W+//gso;
744                         $log->debug("Fingerprint is [$fp_string]", INFO);;
745                         return $fp_string;
746                 }
747
748                 $match_index += 2;
749                 $block_index += 2;
750         }
751         return undef;
752 }
753
754
755 sub fingerprint_bibrec {
756         my $self = shift;
757         my $client = shift;
758         my $rec = shift;
759
760         OpenILS::Application::WoRM->post_init();
761         my $r = OpenILS::Application::WoRM->st_sess->request( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
762
763         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
764         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
765         return $fp;
766
767 }
768 __PACKAGE__->register_method(  
769         api_name        => "open-ils.worm.fingerprint.record",
770         method          => "fingerprint_bibrec",
771         api_level       => 1,
772         argc            => 1,
773 );                      
774
775 sub fingerprint_mods {
776         my $self = shift;
777         my $client = shift;
778         my $xml = shift;
779
780         OpenILS::Application::WoRM->post_init();
781         my $mods = $parser->parse_string($xml)->documentElement;
782
783         return _fp_mods( $mods );
784 }
785 __PACKAGE__->register_method(  
786         api_name        => "open-ils.worm.fingerprint.mods",
787         method          => "fingerprint_mods",
788         api_level       => 1,
789         argc            => 1,
790 );                      
791
792 sub fingerprint_marc {
793         my $self = shift;
794         my $client = shift;
795         my $xml = shift;
796
797         $xml = $parser->parse_string($xml) unless (ref $xml);
798
799         OpenILS::Application::WoRM->post_init();
800         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
801         $log->debug("Returning [$fp] as fingerprint", INFO);
802         return $fp;
803 }
804 __PACKAGE__->register_method(  
805         api_name        => "open-ils.worm.fingerprint.marc",
806         method          => "fingerprint_marc",
807         api_level       => 1,
808         argc            => 1,
809 );                      
810
811
812 # --------------------------------------------------------------------------------
813
814 1;
815 __END__
816 my $in_xact;
817 my $begin;
818 my $commit;
819 my $rollback;
820 my $lookup;
821 my $update_entry;
822 my $mr_lookup;
823 my $mr_update;
824 my $mr_create;
825 my $create_source_map;
826 my $sm_lookup;
827 my $rm_old_rd;
828 my $rm_old_sm;
829 my $rm_old_fr;
830 my $rm_old_tr;
831 my $rm_old_ar;
832 my $rm_old_sr;
833 my $rm_old_kr;
834 my $rm_old_ser;
835
836 my $fr_create;
837 my $rd_create;
838 my $create = {};
839
840 my %descriptor_code = (
841         item_type => 'substr($ldr,6,1)',
842         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
843         bib_level => 'substr($ldr,7,1)',
844         control_type => 'substr($ldr,8,1)',
845         char_encoding => 'substr($ldr,9,1)',
846         enc_level => 'substr($ldr,17,1)',
847         cat_form => 'substr($ldr,18,1)',
848         pub_status => 'substr($ldr,5,1)',
849         item_lang => 'substr($oo8,35,3)',
850         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
851         audience => 'substr($oo8,22,1)',
852 );
853
854 sub wormize {
855
856         my $self = shift;
857         my $client = shift;
858         my @docids = @_;
859
860         my $no_map = 0;
861         if ($self->api_name =~ /no_map/o) {
862                 $no_map = 1;
863         }
864
865         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
866                 unless ($in_xact);
867         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
868                 unless ($begin);
869         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
870                 unless ($commit);
871         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
872                 unless ($rollback);
873         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
874                 unless ($sm_lookup);
875         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
876                 unless ($mr_lookup);
877         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
878                 unless ($mr_update);
879         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
880                 unless ($mr_create);
881         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
882                 unless ($create_source_map);
883         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
884                 unless ($lookup);
885         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
886                 unless ($update_entry);
887         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
888                 unless ($rm_old_sm);
889         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
890                 unless ($rm_old_rd);
891         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
892                 unless ($rm_old_fr);
893         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
894                 unless ($rm_old_tr);
895         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
896                 unless ($rm_old_ar);
897         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
898                 unless ($rm_old_sr);
899         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
900                 unless ($rm_old_kr);
901         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
902                 unless ($rm_old_ser);
903         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
904                 unless ($rd_create);
905         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
906                 unless ($fr_create);
907         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
908                 unless ($$create{title});
909         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
910                 unless ($$create{author});
911         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
912                 unless ($$create{subject});
913         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
914                 unless ($$create{keyword});
915         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
916                 unless ($$create{series});
917
918
919         my ($outer_xact) = $in_xact->run;
920         try {
921                 unless ($outer_xact) {
922                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
923                         my ($r) = $begin->run($client);
924                         unless (defined $r and $r) {
925                                 $rollback->run;
926                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
927                         }
928                 }
929         } catch Error with {
930                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
931         };
932
933         my @source_maps;
934         my @entry_list;
935         my @mr_list;
936         my @rd_list;
937         my @ns_list;
938         my @mods_data;
939         my $ret = 0;
940         for my $entry ( $lookup->run(@docids) ) {
941                 # step -1: grab the doc from storage
942                 next unless ($entry);
943
944                 if(!$mods_sheet) {
945                         my $xslt_doc = $parser->parse_file(
946                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
947                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
948                 }
949
950                 my $xml = $entry->marc;
951                 my $docid = $entry->id;
952                 my $marcdoc = $parser->parse_string($xml);
953                 my $modsdoc = $mods_sheet->transform($marcdoc);
954
955                 my $mods = $modsdoc->documentElement;
956                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
957
958                 $entry->fingerprint( fingerprint_mods( $mods ) );
959                 push @entry_list, $entry;
960
961                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
962
963                 unless ($no_map) {
964                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
965                         if (!$mr || !@$mr) {
966                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
967                                 $mr = new Fieldmapper::metabib::metarecord;
968                                 $mr->fingerprint( $entry->fingerprint );
969                                 $mr->master_record( $entry->id );
970                                 my ($new_mr) = $mr_create->run($mr);
971                                 $mr->id($new_mr);
972                                 unless (defined $mr) {
973                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
974                                 }
975                         } else {
976                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
977                                 $mr->mods('');
978                                 push @mr_list, $mr;
979                         }
980
981                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
982                         $sm->metarecord( $mr->id );
983                         $sm->source( $entry->id );
984                         push @source_maps, $sm;
985                 }
986
987                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
988                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
989
990                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
991                 for my $rd_field ( keys %descriptor_code ) {
992                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
993                 }
994                 $rd_obj->record( $docid );
995                 push @rd_list, $rd_obj;
996
997                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
998
999                 # step 2: build the KOHA rows
1000                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
1001                 $_->record( $docid ) for (@tmp_list);
1002                 push @ns_list, @tmp_list;
1003
1004                 $ret++;
1005
1006                 last unless ($self->api_name =~ /batch$/o);
1007         }
1008
1009         $rm_old_rd->run( { record => \@docids } );
1010         $rm_old_fr->run( { record => \@docids } );
1011         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
1012         $rm_old_tr->run( { source => \@docids } );
1013         $rm_old_ar->run( { source => \@docids } );
1014         $rm_old_sr->run( { source => \@docids } );
1015         $rm_old_kr->run( { source => \@docids } );
1016         $rm_old_ser->run( { source => \@docids } );
1017
1018         unless ($no_map) {
1019                 my ($sm) = $create_source_map->run(@source_maps);
1020                 unless (defined $sm) {
1021                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
1022                 }
1023                 my ($mr) = $mr_update->run(@mr_list);
1024                 unless (defined $mr) {
1025                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
1026                 }
1027         }
1028
1029         my ($re) = $update_entry->run(@entry_list);
1030         unless (defined $re) {
1031                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
1032         }
1033
1034         my ($rd) = $rd_create->run(@rd_list);
1035         unless (defined $rd) {
1036                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
1037         }
1038
1039         my ($fr) = $fr_create->run(@ns_list);
1040         unless (defined $fr) {
1041                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
1042         }
1043
1044         # step 5: insert the new metadata
1045         for my $class ( qw/title author subject keyword series/ ) {
1046                 my @md_list = ();
1047                 for my $doc ( @mods_data ) {
1048                         my ($did) = keys %$doc;
1049                         my ($data) = values %$doc;
1050
1051                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
1052                         for my $row ( keys %{ $$data{$class} } ) {
1053                                 next unless (exists $$data{$class}{$row});
1054                                 next unless ($$data{$class}{$row}{value});
1055                                 my $fm_obj = $fm_constructor->new;
1056                                 $fm_obj->value( $$data{$class}{$row}{value} );
1057                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
1058                                 $fm_obj->source( $did );
1059                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
1060
1061                                 push @md_list, $fm_obj;
1062                         }
1063                 }
1064                         
1065                 my ($cr) = $$create{$class}->run(@md_list);
1066                 unless (defined $cr) {
1067                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
1068                 }
1069         }
1070
1071         unless ($outer_xact) {
1072                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1073                 my ($c) = $commit->run;
1074                 unless (defined $c and $c) {
1075                         $rollback->run;
1076                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1077                 }
1078         }
1079
1080         return $ret;
1081 }
1082 __PACKAGE__->register_method( 
1083         api_name        => "open-ils.worm.wormize",
1084         method          => "wormize",
1085         api_level       => 1,
1086         argc            => 1,
1087 );
1088 __PACKAGE__->register_method( 
1089         api_name        => "open-ils.worm.wormize.no_map",
1090         method          => "wormize",
1091         api_level       => 1,
1092         argc            => 1,
1093 );
1094 __PACKAGE__->register_method( 
1095         api_name        => "open-ils.worm.wormize.batch",
1096         method          => "wormize",
1097         api_level       => 1,
1098         argc            => 1,
1099 );
1100 __PACKAGE__->register_method( 
1101         api_name        => "open-ils.worm.wormize.no_map.batch",
1102         method          => "wormize",
1103         api_level       => 1,
1104         argc            => 1,
1105 );
1106
1107
1108 my $ain_xact;
1109 my $abegin;
1110 my $acommit;
1111 my $arollback;
1112 my $alookup;
1113 my $aupdate_entry;
1114 my $amr_lookup;
1115 my $amr_update;
1116 my $amr_create;
1117 my $acreate_source_map;
1118 my $asm_lookup;
1119 my $arm_old_rd;
1120 my $arm_old_sm;
1121 my $arm_old_fr;
1122 my $arm_old_tr;
1123 my $arm_old_ar;
1124 my $arm_old_sr;
1125 my $arm_old_kr;
1126 my $arm_old_ser;
1127
1128 my $afr_create;
1129 my $ard_create;
1130 my $acreate = {};
1131
1132 sub authority_wormize {
1133
1134         my $self = shift;
1135         my $client = shift;
1136         my @docids = @_;
1137
1138         my $no_map = 0;
1139         if ($self->api_name =~ /no_map/o) {
1140                 $no_map = 1;
1141         }
1142
1143         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1144                 unless ($in_xact);
1145         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1146                 unless ($begin);
1147         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1148                 unless ($commit);
1149         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1150                 unless ($rollback);
1151         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1152                 unless ($alookup);
1153         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1154                 unless ($aupdate_entry);
1155         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1156                 unless ($arm_old_rd);
1157         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1158                 unless ($arm_old_fr);
1159         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1160                 unless ($ard_create);
1161         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1162                 unless ($afr_create);
1163
1164
1165         my ($outer_xact) = $in_xact->run;
1166         try {
1167                 unless ($outer_xact) {
1168                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1169                         my ($r) = $begin->run($client);
1170                         unless (defined $r and $r) {
1171                                 $rollback->run;
1172                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1173                         }
1174                 }
1175         } catch Error with {
1176                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1177         };
1178
1179         my @source_maps;
1180         my @entry_list;
1181         my @mr_list;
1182         my @rd_list;
1183         my @ns_list;
1184         my @mads_data;
1185         my $ret = 0;
1186         for my $entry ( $lookup->run(@docids) ) {
1187                 # step -1: grab the doc from storage
1188                 next unless ($entry);
1189
1190                 #if(!$mads_sheet) {
1191                 #       my $xslt_doc = $parser->parse_file(
1192                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1193                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1194                 #}
1195
1196                 my $xml = $entry->marc;
1197                 my $docid = $entry->id;
1198                 my $marcdoc = $parser->parse_string($xml);
1199                 #my $madsdoc = $mads_sheet->transform($marcdoc);
1200
1201                 #my $mads = $madsdoc->documentElement;
1202                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1203
1204                 push @entry_list, $entry;
1205
1206                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1207                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1208
1209                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1210                 for my $rd_field ( keys %descriptor_code ) {
1211                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1212                 }
1213                 $rd_obj->record( $docid );
1214                 push @rd_list, $rd_obj;
1215
1216                 # step 2: build the KOHA rows
1217                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1218                 $_->record( $docid ) for (@tmp_list);
1219                 push @ns_list, @tmp_list;
1220
1221                 $ret++;
1222
1223                 last unless ($self->api_name =~ /batch$/o);
1224         }
1225
1226         $arm_old_rd->run( { record => \@docids } );
1227         $arm_old_fr->run( { record => \@docids } );
1228
1229         my ($rd) = $ard_create->run(@rd_list);
1230         unless (defined $rd) {
1231                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1232         }
1233
1234         my ($fr) = $fr_create->run(@ns_list);
1235         unless (defined $fr) {
1236                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1237         }
1238
1239         unless ($outer_xact) {
1240                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1241                 my ($c) = $commit->run;
1242                 unless (defined $c and $c) {
1243                         $rollback->run;
1244                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1245                 }
1246         }
1247
1248         return $ret;
1249 }
1250 __PACKAGE__->register_method( 
1251         api_name        => "open-ils.worm.authortiy.wormize",
1252         method          => "wormize",
1253         api_level       => 1,
1254         argc            => 1,
1255 );
1256 __PACKAGE__->register_method( 
1257         api_name        => "open-ils.worm.authority.wormize.batch",
1258         method          => "wormize",
1259         api_level       => 1,
1260         argc            => 1,
1261 );
1262
1263
1264 # --------------------------------------------------------------------------------
1265
1266
1267 sub _marcxml_to_full_rows {
1268
1269         my $marcxml = shift;
1270         my $type = shift || 'Fieldmapper::metabib::full_rec';
1271
1272         my @ns_list;
1273         
1274         my $root = $marcxml->documentElement;
1275
1276         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1277                 next unless $tagline;
1278
1279                 my $ns = new Fieldmapper::metabib::full_rec;
1280
1281                 $ns->tag( 'LDR' );
1282                 my $val = NFD($tagline->textContent);
1283                 $val =~ s/(\pM+)//gso;
1284                 $ns->value( $val );
1285
1286                 push @ns_list, $ns;
1287         }
1288
1289         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1290                 next unless $tagline;
1291
1292                 my $ns = new Fieldmapper::metabib::full_rec;
1293
1294                 $ns->tag( $tagline->getAttribute( "tag" ) );
1295                 my $val = NFD($tagline->textContent);
1296                 $val =~ s/(\pM+)//gso;
1297                 $ns->value( $val );
1298
1299                 push @ns_list, $ns;
1300         }
1301
1302         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1303                 next unless $tagline;
1304
1305                 my $tag = $tagline->getAttribute( "tag" );
1306                 my $ind1 = $tagline->getAttribute( "ind1" );
1307                 my $ind2 = $tagline->getAttribute( "ind2" );
1308
1309                 for my $data ( $tagline->childNodes ) {
1310                         next unless $data;
1311
1312                         my $ns = $type->new;
1313
1314                         $ns->tag( $tag );
1315                         $ns->ind1( $ind1 );
1316                         $ns->ind2( $ind2 );
1317                         $ns->subfield( $data->getAttribute( "code" ) );
1318                         my $val = NFD($data->textContent);
1319                         $val =~ s/(\pM+)//gso;
1320                         $ns->value( lc($val) );
1321
1322                         push @ns_list, $ns;
1323                 }
1324         }
1325         return @ns_list;
1326 }
1327
1328 sub _get_field_value {
1329
1330         my( $root, $xpath ) = @_;
1331
1332         my $string = "";
1333
1334         # grab the set of matching nodes
1335         my @nodes = $root->findnodes( $xpath );
1336         for my $value (@nodes) {
1337
1338                 # grab all children of the node
1339                 my @children = $value->childNodes();
1340                 for my $child (@children) {
1341
1342                         # add the childs content to the growing buffer
1343                         my $content = quotemeta($child->textContent);
1344                         next if ($string =~ /$content/);  # uniquify the values
1345                         $string .= $child->textContent . " ";
1346                 }
1347                 if( ! @children ) {
1348                         $string .= $value->textContent . " ";
1349                 }
1350         }
1351         $string = NFD($string);
1352         $string =~ s/(\pM)//gso;
1353         return lc($string);
1354 }
1355
1356
1357 sub modsdoc_to_values {
1358         my( $self, $mods ) = @_;
1359         my $data = {};
1360         for my $class (keys %$xpathset) {
1361                 $data->{$class} = {};
1362                 for my $type (keys %{$xpathset->{$class}}) {
1363                         $data->{$class}->{$type} = {};
1364                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
1365                 }
1366         }
1367         return $data;
1368 }
1369
1370
1371 1;
1372
1373