]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Application/Vandelay.pm
stub of vandelay ML
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Vandelay.pm
1 package OpenILS::Application::Vandelay;
2 use OpenILS::Application;
3 use base qw/OpenILS::Application/;
4
5 sub initialize {}
6 sub child_init {}
7
8 sub entityize {
9         my $self = shift;
10         my $stuff = shift;
11         my $form = shift;
12
13         if ($form eq 'D') {
14                 $stuff = NFD($stuff);
15         } else {
16                 $stuff = NFC($stuff);
17         }
18
19         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
20         return $stuff;
21 }
22
23 # --------------------------------------------------------------------------------
24 # Biblio ingest
25
26 package OpenILS::Application::Vandelay::Biblio;
27 use base qw/OpenILS::Application::Vandelay/;
28
29 use Unicode::Normalize;
30 use OpenSRF::EX qw/:try/;
31
32 use OpenSRF::AppSession;
33 use OpenSRF::Utils::SettingsClient;
34
35 use OpenILS::Utils::Fieldmapper;
36 use OpenSRF::Utils::JSON;
37 use MARC::Record;
38 use MARC::File::XML;
39
40 use OpenILS::Utils::Fieldmapper;
41
42 use Time::HiRes qw(time);
43
44 use OpenSRF::Utils::Logger qw/:level/;
45 our $log = 'OpenSRF::Utils::Logger';
46
47 sub create_bib_queue {
48         my $self = shift;
49         my $client = shift;
50         my $bib = shift;
51
52
53 sub rw_biblio_ingest_single_object {
54         my $self = shift;
55         my $client = shift;
56         my $bib = shift;
57
58         my ($blob) = $self->method_lookup("open-ils.ingest.full.biblio.object.readonly")->run($bib);
59         return undef unless ($blob);
60
61         $bib->fingerprint( $blob->{fingerprint}->{fingerprint} );
62         $bib->quality( $blob->{fingerprint}->{quality} );
63
64         my $cstore = OpenSRF::AppSession->connect('open-ils.cstore');
65
66         my $xact = $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
67
68         # update full_rec stuff ...
69         my $tmp = $cstore->request(
70                 'open-ils.cstore.direct.metabib.full_rec.id_list.atomic',
71                 { record => $bib->id }
72         )->gather(1);
73
74         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.delete' => $_ )->gather(1) for (@$tmp);
75         $cstore->request( 'open-ils.cstore.direct.metabib.full_rec.create' => $_ )->gather(1) for (@{ $blob->{full_rec} });
76
77         # update rec_descriptor stuff ...
78         $tmp = $cstore->request(
79                 'open-ils.cstore.direct.metabib.record_descriptor.id_list.atomic',
80                 { record => $bib->id }
81         )->gather(1);
82
83         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.delete' => $_ )->gather(1) for (@$tmp);
84         $cstore->request( 'open-ils.cstore.direct.metabib.record_descriptor.create' => $blob->{descriptor} )->gather(1);
85
86         # deal with classed fields...
87         for my $class ( qw/title author subject keyword series/ ) {
88                 $tmp = $cstore->request(
89                         "open-ils.cstore.direct.metabib.${class}_field_entry.id_list.atomic",
90                         { source => $bib->id }
91                 )->gather(1);
92
93                 $cstore->request( "open-ils.cstore.direct.metabib.${class}_field_entry.delete" => $_ )->gather(1) for (@$tmp);
94         }
95         for my $obj ( @{ $blob->{field_entries} } ) {
96                 my $class = $obj->class_name;
97                 $class =~ s/^Fieldmapper:://o;
98                 $class =~ s/::/./go;
99                 $cstore->request( "open-ils.cstore.direct.$class.create" => $obj )->gather(1);
100         }
101
102         # update MR map ...
103
104         $tmp = $cstore->request(
105                 'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
106                 { source => $bib->id }
107         )->gather(1);
108
109         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.delete' => $_->id )->gather(1) for (@$tmp);
110
111         # get the old MRs
112         my $old_mrs = $cstore->request(
113                 'open-ils.cstore.direct.metabib.metarecord.search.atomic' => { id => [map { $_->metarecord } @$tmp] }
114         )->gather(1) if (@$tmp);
115
116         $old_mrs = [] if (!ref($old_mrs));
117
118         my $mr;
119         for my $m (@$old_mrs) {
120                 if ($m->fingerprint eq $bib->fingerprint) {
121                         $mr = $m;
122                 } else {
123                         my $others = $cstore->request(
124                                 'open-ils.cstore.direct.metabib.metarecord_source_map.id_list.atomic' => { metarecord => $m->id }
125                         )->gather(1);
126
127                         if (!@$others) {
128                                 $cstore->request(
129                                         'open-ils.cstore.direct.metabib.metarecord.delete' => $m->id
130                                 )->gather(1);
131                         }
132
133                         $m->isdeleted(1);
134                 }
135         }
136
137         my $holds;
138         if (!$mr) {
139                 # Get the matchin MR, if any.
140                 $mr = $cstore->request(
141                         'open-ils.cstore.direct.metabib.metarecord.search',
142                         { fingerprint => $bib->fingerprint }
143                 )->gather(1);
144
145                 $holds = $cstore->request(
146                         'open-ils.cstore.direct.action.hold_request.search.atomic',
147                         { hold_type => 'M', target => [ map { $_->id } grep { $_->isdeleted } @$old_mrs ] }
148                 )->gather(1) if (@$old_mrs);
149
150                 if ($mr) {
151                         for my $h (@$holds) {
152                                 $h->target($mr);
153                                 $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
154                                 $h->ischanged(1);
155                         }
156                 }
157         }
158
159         if (!$mr) {
160                 $mr = new Fieldmapper::metabib::metarecord;
161                 $mr->fingerprint( $bib->fingerprint );
162                 $mr->master_record( $bib->id );
163                 $mr->id(
164                         $cstore->request(
165                                 "open-ils.cstore.direct.metabib.metarecord.create",
166                                 $mr => { quiet => 'true' }
167                         )->gather(1)
168                 );
169
170                 for my $h (grep { !$_->ischanged } @$holds) {
171                         $h->target($mr);
172                         $cstore->request( 'open-ils.cstore.direct.action.hold_request.update' => $h )->gather(1);
173                 }
174         } else {
175                 my $mrm = $cstore->request(
176                         'open-ils.cstore.direct.metabib.metarecord_source_map.search.atomic',
177                         { metarecord => $mr->id }
178                 )->gather(1);
179
180                 if (@$mrm) {
181                         my $best = $cstore->request(
182                                 "open-ils.cstore.direct.biblio.record_entry.search",
183                                 { id => [ map { $_->source } @$mrm ] },
184                                 { 'select'      => { bre => [ qw/id quality/ ] },
185                                 order_by        => { bre => "quality desc" },
186                                 limit           => 1,
187                                 }
188                         )->gather(1);
189
190                         if ($best->quality > $bib->quality) {
191                                 $mr->master_record($best->id);
192                         } else {
193                                 $mr->master_record($bib->id);
194                         }
195                 } else {
196                         $mr->master_record($bib->id);
197                 }
198
199                 $mr->clear_mods;
200
201                 $cstore->request( 'open-ils.cstore.direct.metabib.metarecord.update' => $mr )->gather(1);
202         }
203
204         my $mrm = new Fieldmapper::metabib::metarecord_source_map;
205         $mrm->source($bib->id);
206         $mrm->metarecord($mr->id);
207
208         $cstore->request( 'open-ils.cstore.direct.metabib.metarecord_source_map.create' => $mrm )->gather(1);
209         $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.update' => $bib )->gather(1);
210
211         $cstore->request( 'open-ils.cstore.transaction.commit' )->gather(1) || return undef;;
212
213         return $bib->id;
214 }
215 __PACKAGE__->register_method(  
216         api_name        => "open-ils.ingest.full.biblio.object",
217         method          => "rw_biblio_ingest_single_object",
218         api_level       => 1,
219         argc            => 1,
220 );                      
221
222 sub rw_biblio_ingest_single_record {
223         my $self = shift;
224         my $client = shift;
225         my $rec = shift;
226
227         OpenILS::Application::Ingest->post_init();
228         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
229         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
230
231         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
232
233         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
234         $cstore->disconnect;
235
236         return undef unless ($r and @$r);
237
238         return ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($r))[0];
239 }
240 __PACKAGE__->register_method(  
241         api_name        => "open-ils.ingest.full.biblio.record",
242         method          => "rw_biblio_ingest_single_record",
243         api_level       => 1,
244         argc            => 1,
245 );                      
246
247 sub rw_biblio_ingest_record_list {
248         my $self = shift;
249         my $client = shift;
250         my @rec = ref($_[0]) ? @{ $_[0] } : @_ ;
251
252         OpenILS::Application::Ingest->post_init();
253         my $cstore = OpenSRF::AppSession->connect( 'open-ils.cstore' );
254         $cstore->request('open-ils.cstore.transaction.begin')->gather(1);
255
256         my $r = $cstore->request( 'open-ils.cstore.direct.biblio.record_entry.search.atomic' => { id => $rec } )->gather(1);
257
258         $cstore->request('open-ils.cstore.transaction.rollback')->gather(1);
259         $cstore->disconnect;
260
261         return undef unless ($r and @$r);
262
263         my $count = 0;
264         $count += ($self->method_lookup("open-ils.ingest.full.biblio.object")->run($_))[0] for (@$r);
265
266         return $count;
267 }
268 __PACKAGE__->register_method(  
269         api_name        => "open-ils.ingest.full.biblio.record_list",
270         method          => "rw_biblio_ingest_record_list",
271         api_level       => 1,
272         argc            => 1,
273 );                      
274
275 sub ro_biblio_ingest_single_object {
276         my $self = shift;
277         my $client = shift;
278         my $bib = shift;
279         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
280
281         my $document = $parser->parse_string($xml);
282
283         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
284         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
285         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
286         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
287
288         $_->source($bib->id) for (@mXfe);
289         $_->record($bib->id) for (@mfr);
290         $rd->record($bib->id) if ($rd);
291
292         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
293 }
294 __PACKAGE__->register_method(  
295         api_name        => "open-ils.ingest.full.biblio.object.readonly",
296         method          => "ro_biblio_ingest_single_object",
297         api_level       => 1,
298         argc            => 1,
299 );                      
300
301 sub ro_biblio_ingest_single_xml {
302         my $self = shift;
303         my $client = shift;
304         my $xml = OpenILS::Application::Ingest::entityize(shift);
305
306         my $document = $parser->parse_string($xml);
307
308         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.biblio.xml")->run($document);
309         my @mXfe = $self->method_lookup("open-ils.ingest.extract.field_entry.all.xml")->run($document);
310         my ($fp) = $self->method_lookup("open-ils.ingest.fingerprint.xml")->run($xml);
311         my ($rd) = $self->method_lookup("open-ils.ingest.descriptor.xml")->run($xml);
312
313         return { full_rec => \@mfr, field_entries => \@mXfe, fingerprint => $fp, descriptor => $rd };
314 }
315 __PACKAGE__->register_method(  
316         api_name        => "open-ils.ingest.full.biblio.xml.readonly",
317         method          => "ro_biblio_ingest_single_xml",
318         api_level       => 1,
319         argc            => 1,
320 );                      
321
322 sub ro_biblio_ingest_single_record {
323         my $self = shift;
324         my $client = shift;
325         my $rec = shift;
326
327         OpenILS::Application::Ingest->post_init();
328         my $r = OpenSRF::AppSession
329                         ->create('open-ils.cstore')
330                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
331                         ->gather(1);
332
333         return undef unless ($r and @$r);
334
335         my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($r->marc);
336
337         $_->source($rec) for (@{$res->{field_entries}});
338         $_->record($rec) for (@{$res->{full_rec}});
339         $res->{descriptor}->record($rec);
340
341         return $res;
342 }
343 __PACKAGE__->register_method(  
344         api_name        => "open-ils.ingest.full.biblio.record.readonly",
345         method          => "ro_biblio_ingest_single_record",
346         api_level       => 1,
347         argc            => 1,
348 );                      
349
350 sub ro_biblio_ingest_stream_record {
351         my $self = shift;
352         my $client = shift;
353
354         OpenILS::Application::Ingest->post_init();
355
356         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
357
358         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
359         
360                 my $rec = $resp->content;
361                 last unless (defined $rec);
362
363                 $log->debug("Running open-ils.ingest.full.biblio.record.readonly ...");
364                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.record.readonly")->run($rec);
365
366                 $_->source($rec) for (@{$res->{field_entries}});
367                 $_->record($rec) for (@{$res->{full_rec}});
368
369                 $client->respond( $res );
370         }
371
372         return undef;
373 }
374 __PACKAGE__->register_method(  
375         api_name        => "open-ils.ingest.full.biblio.record_stream.readonly",
376         method          => "ro_biblio_ingest_stream_record",
377         api_level       => 1,
378         stream          => 1,
379 );                      
380
381 sub ro_biblio_ingest_stream_xml {
382         my $self = shift;
383         my $client = shift;
384
385         OpenILS::Application::Ingest->post_init();
386
387         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
388
389         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
390         
391                 my $xml = $resp->content;
392                 last unless (defined $xml);
393
394                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
395                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($xml);
396
397                 $client->respond( $res );
398         }
399
400         return undef;
401 }
402 __PACKAGE__->register_method(  
403         api_name        => "open-ils.ingest.full.biblio.xml_stream.readonly",
404         method          => "ro_biblio_ingest_stream_xml",
405         api_level       => 1,
406         stream          => 1,
407 );                      
408
409 sub rw_biblio_ingest_stream_import {
410         my $self = shift;
411         my $client = shift;
412
413         OpenILS::Application::Ingest->post_init();
414
415         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
416
417         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
418         
419                 my $bib = $resp->content;
420                 last unless (defined $bib);
421
422                 $log->debug("Running open-ils.ingest.full.biblio.xml.readonly ...");
423                 my ($res) = $self->method_lookup("open-ils.ingest.full.biblio.xml.readonly")->run($bib->marc);
424
425                 $_->source($bib->id) for (@{$res->{field_entries}});
426                 $_->record($bib->id) for (@{$res->{full_rec}});
427
428                 $client->respond( $res );
429         }
430
431         return undef;
432 }
433 __PACKAGE__->register_method(  
434         api_name        => "open-ils.ingest.full.biblio.bib_stream.import",
435         method          => "rw_biblio_ingest_stream_import",
436         api_level       => 1,
437         stream          => 1,
438 );                      
439
440
441 # --------------------------------------------------------------------------------
442 # Authority ingest
443
444 package OpenILS::Application::Ingest::Authority;
445 use base qw/OpenILS::Application::Ingest/;
446 use Unicode::Normalize;
447
448 sub ro_authority_ingest_single_object {
449         my $self = shift;
450         my $client = shift;
451         my $bib = shift;
452         my $xml = OpenILS::Application::Ingest::entityize($bib->marc);
453
454         my $document = $parser->parse_string($xml);
455
456         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
457
458         $_->record($bib->id) for (@mfr);
459
460         return { full_rec => \@mfr };
461 }
462 __PACKAGE__->register_method(  
463         api_name        => "open-ils.ingest.full.authority.object.readonly",
464         method          => "ro_authority_ingest_single_object",
465         api_level       => 1,
466         argc            => 1,
467 );                      
468
469 sub ro_authority_ingest_single_xml {
470         my $self = shift;
471         my $client = shift;
472         my $xml = OpenILS::Application::Ingest::entityize(shift);
473
474         my $document = $parser->parse_string($xml);
475
476         my @mfr = $self->method_lookup("open-ils.ingest.flat_marc.authority.xml")->run($document);
477
478         return { full_rec => \@mfr };
479 }
480 __PACKAGE__->register_method(  
481         api_name        => "open-ils.ingest.full.authority.xml.readonly",
482         method          => "ro_authority_ingest_single_xml",
483         api_level       => 1,
484         argc            => 1,
485 );                      
486
487 sub ro_authority_ingest_single_record {
488         my $self = shift;
489         my $client = shift;
490         my $rec = shift;
491
492         OpenILS::Application::Ingest->post_init();
493         my $r = OpenSRF::AppSession
494                         ->create('open-ils.cstore')
495                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
496                         ->gather(1);
497
498         return undef unless ($r and @$r);
499
500         my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($r->marc);
501
502         $_->record($rec) for (@{$res->{full_rec}});
503         $res->{descriptor}->record($rec);
504
505         return $res;
506 }
507 __PACKAGE__->register_method(  
508         api_name        => "open-ils.ingest.full.authority.record.readonly",
509         method          => "ro_authority_ingest_single_record",
510         api_level       => 1,
511         argc            => 1,
512 );                      
513
514 sub ro_authority_ingest_stream_record {
515         my $self = shift;
516         my $client = shift;
517
518         OpenILS::Application::Ingest->post_init();
519
520         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
521
522         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
523         
524                 my $rec = $resp->content;
525                 last unless (defined $rec);
526
527                 $log->debug("Running open-ils.ingest.full.authority.record.readonly ...");
528                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.record.readonly")->run($rec);
529
530                 $_->record($rec) for (@{$res->{full_rec}});
531
532                 $client->respond( $res );
533         }
534
535         return undef;
536 }
537 __PACKAGE__->register_method(  
538         api_name        => "open-ils.ingest.full.authority.record_stream.readonly",
539         method          => "ro_authority_ingest_stream_record",
540         api_level       => 1,
541         stream          => 1,
542 );                      
543
544 sub ro_authority_ingest_stream_xml {
545         my $self = shift;
546         my $client = shift;
547
548         OpenILS::Application::Ingest->post_init();
549
550         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
551
552         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
553         
554                 my $xml = $resp->content;
555                 last unless (defined $xml);
556
557                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
558                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($xml);
559
560                 $client->respond( $res );
561         }
562
563         return undef;
564 }
565 __PACKAGE__->register_method(  
566         api_name        => "open-ils.ingest.full.authority.xml_stream.readonly",
567         method          => "ro_authority_ingest_stream_xml",
568         api_level       => 1,
569         stream          => 1,
570 );                      
571
572 sub rw_authority_ingest_stream_import {
573         my $self = shift;
574         my $client = shift;
575
576         OpenILS::Application::Ingest->post_init();
577
578         my $ses = OpenSRF::AppSession->create('open-ils.cstore');
579
580         while (my ($resp) = $client->recv( count => 1, timeout => 5 )) {
581         
582                 my $bib = $resp->content;
583                 last unless (defined $bib);
584
585                 $log->debug("Running open-ils.ingest.full.authority.xml.readonly ...");
586                 my ($res) = $self->method_lookup("open-ils.ingest.full.authority.xml.readonly")->run($bib->marc);
587
588                 $_->record($bib->id) for (@{$res->{full_rec}});
589
590                 $client->respond( $res );
591         }
592
593         return undef;
594 }
595 __PACKAGE__->register_method(  
596         api_name        => "open-ils.ingest.full.authority.bib_stream.import",
597         method          => "rw_authority_ingest_stream_import",
598         api_level       => 1,
599         stream          => 1,
600 );                      
601
602
603 # --------------------------------------------------------------------------------
604 # MARC index extraction
605
606 package OpenILS::Application::Ingest::XPATH;
607 use base qw/OpenILS::Application::Ingest/;
608 use Unicode::Normalize;
609
610 # give this an XML documentElement and an XPATH expression
611 sub xpath_to_string {
612         my $xml = shift;
613         my $xpath = shift;
614         my $ns_uri = shift;
615         my $ns_prefix = shift;
616         my $unique = shift;
617
618         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
619
620         my $string = "";
621
622         # grab the set of matching nodes
623         my @nodes = $xml->findnodes( $xpath );
624         for my $value (@nodes) {
625
626                 # grab all children of the node
627                 my @children = $value->childNodes();
628                 for my $child (@children) {
629
630                         # add the childs content to the growing buffer
631                         my $content = quotemeta($child->textContent);
632                         next if ($unique && $string =~ /$content/);  # uniquify the values
633                         $string .= $child->textContent . " ";
634                 }
635                 if( ! @children ) {
636                         $string .= $value->textContent . " ";
637                 }
638         }
639         return NFD($string);
640 }
641
642 sub class_index_string_xml {
643         my $self = shift;
644         my $client = shift;
645         my $xml = shift;
646         my @classes = @_;
647
648         OpenILS::Application::Ingest->post_init();
649         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
650
651         my %transform_cache;
652         
653         for my $class (@classes) {
654                 my $class_constructor = "Fieldmapper::metabib::${class}_field_entry";
655                 for my $type ( keys %{ $xpathset->{$class} } ) {
656
657                         my $def = $xpathset->{$class}->{$type};
658                         my $sf = $OpenILS::Application::Ingest::supported_formats{$def->{format}};
659
660                         my $document = $xml;
661
662                         if ($sf->{xslt}) {
663                                 $document = $transform_cache{$def->{format}} || $sf->{xslt}->transform($xml);
664                                 $transform_cache{$def->{format}} = $document;
665                         }
666
667                         my $value =  xpath_to_string(
668                                         $document->documentElement      => $def->{xpath},
669                                         $sf->{ns}                       => $def->{format},
670                                         1
671                         );
672
673                         next unless $value;
674
675                         $value = NFD($value);
676                         $value =~ s/\pM+//sgo;
677                         $value =~ s/\pC+//sgo;
678                         $value =~ s/\W+$//sgo;
679
680                         $value =~ s/\b\.+\b//sgo;
681                         $value = lc($value);
682
683                         my $fm = $class_constructor->new;
684                         $fm->value( $value );
685                         $fm->field( $xpathset->{$class}->{$type}->{id} );
686                         $client->respond($fm);
687                 }
688         }
689         return undef;
690 }
691 __PACKAGE__->register_method(  
692         api_name        => "open-ils.ingest.field_entry.class.xml",
693         method          => "class_index_string_xml",
694         api_level       => 1,
695         argc            => 2,
696         stream          => 1,
697 );                      
698
699 sub class_index_string_record {
700         my $self = shift;
701         my $client = shift;
702         my $rec = shift;
703         my @classes = shift;
704
705         OpenILS::Application::Ingest->post_init();
706         my $r = OpenSRF::AppSession
707                         ->create('open-ils.cstore')
708                         ->request( 'open-ils.cstore.direct.authority.record_entry.retrieve' => $rec )
709                         ->gather(1);
710
711         return undef unless ($r and @$r);
712
713         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, @classes)) {
714                 $fm->source($rec);
715                 $client->respond($fm);
716         }
717         return undef;
718 }
719 __PACKAGE__->register_method(  
720         api_name        => "open-ils.ingest.field_entry.class.record",
721         method          => "class_index_string_record",
722         api_level       => 1,
723         argc            => 2,
724         stream          => 1,
725 );                      
726
727 sub all_index_string_xml {
728         my $self = shift;
729         my $client = shift;
730         my $xml = shift;
731
732         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($xml, keys(%$xpathset))) {
733                 $client->respond($fm);
734         }
735         return undef;
736 }
737 __PACKAGE__->register_method(  
738         api_name        => "open-ils.ingest.extract.field_entry.all.xml",
739         method          => "all_index_string_xml",
740         api_level       => 1,
741         argc            => 1,
742         stream          => 1,
743 );                      
744
745 sub all_index_string_record {
746         my $self = shift;
747         my $client = shift;
748         my $rec = shift;
749
750         OpenILS::Application::Ingest->post_init();
751         my $r = OpenSRF::AppSession
752                         ->create('open-ils.cstore')
753                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
754                         ->gather(1);
755
756         return undef unless ($r and @$r);
757
758         for my $fm ($self->method_lookup("open-ils.ingest.field_entry.class.xml")->run($r->marc, keys(%$xpathset))) {
759                 $fm->source($rec);
760                 $client->respond($fm);
761         }
762         return undef;
763 }
764 __PACKAGE__->register_method(  
765         api_name        => "open-ils.ingest.extract.field_entry.all.record",
766         method          => "all_index_string_record",
767         api_level       => 1,
768         argc            => 1,
769         stream          => 1,
770 );                      
771
772 # --------------------------------------------------------------------------------
773 # Flat MARC
774
775 package OpenILS::Application::Ingest::FlatMARC;
776 use base qw/OpenILS::Application::Ingest/;
777 use Unicode::Normalize;
778
779
780 sub _marcxml_to_full_rows {
781
782         my $marcxml = shift;
783         my $xmltype = shift || 'metabib';
784
785         my $type = "Fieldmapper::${xmltype}::full_rec";
786
787         my @ns_list;
788         
789         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
790
791         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
792                 next unless $tagline;
793
794                 my $ns = $type->new;
795
796                 $ns->tag( 'LDR' );
797                 my $val = $tagline->textContent;
798                 $val = NFD($val);
799                 $val =~ s/\pM+//sgo;
800                 $val =~ s/\pC+//sgo;
801                 $val =~ s/\W+$//sgo;
802                 $ns->value( $val );
803
804                 push @ns_list, $ns;
805         }
806
807         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
808                 next unless $tagline;
809
810                 my $ns = $type->new;
811
812                 $ns->tag( $tagline->getAttribute( "tag" ) );
813                 my $val = $tagline->textContent;
814                 $val = NFD($val);
815                 $val =~ s/\pM+//sgo;
816                 $val =~ s/\pC+//sgo;
817                 $val =~ s/\W+$//sgo;
818                 $ns->value( $val );
819
820                 push @ns_list, $ns;
821         }
822
823         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
824                 next unless $tagline;
825
826                 my $tag = $tagline->getAttribute( "tag" );
827                 my $ind1 = $tagline->getAttribute( "ind1" );
828                 my $ind2 = $tagline->getAttribute( "ind2" );
829
830                 for my $data ( @{$tagline->getChildrenByTagName('subfield')} ) {
831                         next unless $data;
832
833                         my $ns = $type->new;
834
835                         $ns->tag( $tag );
836                         $ns->ind1( $ind1 );
837                         $ns->ind2( $ind2 );
838                         $ns->subfield( $data->getAttribute( "code" ) );
839                         my $val = $data->textContent;
840                         $val = NFD($val);
841                         $val =~ s/\pM+//sgo;
842                         $val =~ s/\pC+//sgo;
843                         $val =~ s/\W+$//sgo;
844                         $ns->value( lc($val) );
845
846                         push @ns_list, $ns;
847                 }
848         }
849
850         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml");
851         return @ns_list;
852 }
853
854 sub flat_marc_xml {
855         my $self = shift;
856         my $client = shift;
857         my $xml = shift;
858
859         $log->debug("processing [$xml]");
860
861         $xml = $parser->parse_string(OpenILS::Application::Ingest::entityize($xml)) unless (ref $xml);
862
863         my $type = 'metabib';
864         $type = 'authority' if ($self->api_name =~ /authority/o);
865
866         OpenILS::Application::Ingest->post_init();
867
868         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
869         return undef;
870 }
871 __PACKAGE__->register_method(  
872         api_name        => "open-ils.ingest.flat_marc.authority.xml",
873         method          => "flat_marc_xml",
874         api_level       => 1,
875         argc            => 1,
876         stream          => 1,
877 );                      
878 __PACKAGE__->register_method(  
879         api_name        => "open-ils.ingest.flat_marc.biblio.xml",
880         method          => "flat_marc_xml",
881         api_level       => 1,
882         argc            => 1,
883         stream          => 1,
884 );                      
885
886 sub flat_marc_record {
887         my $self = shift;
888         my $client = shift;
889         my $rec = shift;
890
891         my $type = 'biblio';
892         $type = 'authority' if ($self->api_name =~ /authority/o);
893
894         OpenILS::Application::Ingest->post_init();
895         my $r = OpenSRF::AppSession
896                         ->create('open-ils.cstore')
897                         ->request( "open-ils.cstore.direct.${type}.record_entry.retrieve" => $rec )
898                         ->gather(1);
899
900
901         return undef unless ($r and $r->marc);
902
903         my @rows = $self->method_lookup("open-ils.ingest.flat_marc.$type.xml")->run($r->marc);
904         for my $row (@rows) {
905                 $client->respond($row);
906                 $log->debug(OpenSRF::Utils::JSON->perl2JSON($row), DEBUG);
907         }
908         return undef;
909 }
910 __PACKAGE__->register_method(  
911         api_name        => "open-ils.ingest.flat_marc.biblio.record_entry",
912         method          => "flat_marc_record",
913         api_level       => 1,
914         argc            => 1,
915         stream          => 1,
916 );                      
917 __PACKAGE__->register_method(  
918         api_name        => "open-ils.ingest.flat_marc.authority.record_entry",
919         method          => "flat_marc_record",
920         api_level       => 1,
921         argc            => 1,
922         stream          => 1,
923 );                      
924
925 # --------------------------------------------------------------------------------
926 # Fingerprinting
927
928 package OpenILS::Application::Ingest::Biblio::Fingerprint;
929 use base qw/OpenILS::Application::Ingest/;
930 use Unicode::Normalize;
931 use OpenSRF::EX qw/:try/;
932
933 sub biblio_fingerprint_record {
934         my $self = shift;
935         my $client = shift;
936         my $rec = shift;
937
938         OpenILS::Application::Ingest->post_init();
939
940         my $r = OpenSRF::AppSession
941                         ->create('open-ils.cstore')
942                         ->request( 'open-ils.cstore.direct.biblio.record_entry.retrieve' => $rec )
943                         ->gather(1);
944
945         return undef unless ($r and $r->marc);
946
947         my ($fp) = $self->method_lookup('open-ils.ingest.fingerprint.xml')->run($r->marc);
948         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
949         $fp->{quality} = int($fp->{quality});
950         return $fp;
951 }
952 __PACKAGE__->register_method(  
953         api_name        => "open-ils.ingest.fingerprint.record",
954         method          => "biblio_fingerprint_record",
955         api_level       => 1,
956         argc            => 1,
957 );                      
958
959 our $fp_script;
960 sub biblio_fingerprint {
961         my $self = shift;
962         my $client = shift;
963         my $xml = OpenILS::Application::Ingest::entityize(shift);
964
965         $log->internal("Got MARC [$xml]");
966
967         if(!$fp_script) {
968                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
969                 my $conf = OpenSRF::Utils::SettingsClient->new;
970
971                 my $libs        = $conf->config_value(@pfx, 'script_path');
972                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_fingerprint');
973                 my $script_libs = (ref($libs)) ? $libs : [$libs];
974
975                 $log->debug("Loading script $script_file for biblio fingerprinting...");
976                 
977                 $fp_script = new OpenILS::Utils::ScriptRunner
978                         ( file          => $script_file,
979                           paths         => $script_libs,
980                           reset_count   => 100 );
981         }
982
983         $fp_script->insert('environment' => {marc => $xml} => 1);
984
985         my $res = $fp_script->run || ($log->error( "Fingerprint script died!  $@" ) && return undef);
986         $log->debug("Script for biblio fingerprinting completed successfully...");
987
988         return $res;
989 }
990 __PACKAGE__->register_method(  
991         api_name        => "open-ils.ingest.fingerprint.xml",
992         method          => "biblio_fingerprint",
993         api_level       => 1,
994         argc            => 1,
995 );                      
996
997 our $rd_script;
998 sub biblio_descriptor {
999         my $self = shift;
1000         my $client = shift;
1001         my $xml = OpenILS::Application::Ingest::entityize(shift);
1002
1003         $log->internal("Got MARC [$xml]");
1004
1005         if(!$rd_script) {
1006                 my @pfx = ( "apps", "open-ils.ingest","app_settings" );
1007                 my $conf = OpenSRF::Utils::SettingsClient->new;
1008
1009                 my $libs        = $conf->config_value(@pfx, 'script_path');
1010                 my $script_file = $conf->config_value(@pfx, 'scripts', 'biblio_descriptor');
1011                 my $script_libs = (ref($libs)) ? $libs : [$libs];
1012
1013                 $log->debug("Loading script $script_file for biblio descriptor extraction...");
1014                 
1015                 $rd_script = new OpenILS::Utils::ScriptRunner
1016                         ( file          => $script_file,
1017                           paths         => $script_libs,
1018                           reset_count   => 100 );
1019         }
1020
1021         $log->debug("Setting up environment for descriptor extraction script...");
1022         $rd_script->insert('environment.marc' => $xml => 1);
1023         $log->debug("Environment building complete...");
1024
1025         my $res = $rd_script->run || ($log->error( "Descriptor script died!  $@" ) && return undef);
1026         $log->debug("Script for biblio descriptor extraction completed successfully");
1027
1028         return $res;
1029 }
1030 __PACKAGE__->register_method(  
1031         api_name        => "open-ils.ingest.descriptor.xml",
1032         method          => "biblio_descriptor",
1033         api_level       => 1,
1034         argc            => 1,
1035 );                      
1036
1037
1038 1;
1039