initial AUTHORITY worming
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Storage / WORM.pm
1 package OpenILS::Application::Storage::WORM;
2 use base qw/OpenILS::Application::Storage/;
3 use strict; use warnings;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10 my $log = 'OpenSRF::Utils::Logger';
11
12 use OpenILS::Utils::FlatXML;
13 use OpenILS::Utils::Fieldmapper;
14 use JSON;
15
16 use XML::LibXML;
17 use XML::LibXSLT;
18 use Time::HiRes qw(time);
19
20 my $xml_util    = OpenILS::Utils::FlatXML->new();
21
22 my $parser              = XML::LibXML->new();
23 my $xslt                        = XML::LibXSLT->new();
24 my $mods_sheet;
25 my $mads_sheet;
26
27 use open qw/:utf8/;
28
29 my $xpathset = {};
30
31 sub child_init {
32         my $meth = __PACKAGE__->method_lookup('open-ils.storage.direct.config.metabib_field.retrieve.all');
33         for my $f ($meth->run) {
34                 $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
35                 $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
36                 $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
37         }
38 }
39
40 # --------------------------------------------------------------------------------
41 # Fingerprinting
42
43 my @fp_mods_xpath = (
44         '//mods:mods/mods:typeOfResource[text()="text"]' => [
45                         title   => {
46                                         xpath   => [
47                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
48                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
49                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
50                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
51                                         ],
52                                         fixup   => '
53                                                         do {
54                                                                 $text = lc(NFD($text));
55                                                                 $text =~ s/\pM+//gso;
56                                                                 $text =~ s/\s+/ /sgo;
57                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
58                                                                 $text =~ s/\b(?:the|an?)\b//sgo;
59                                                                 $text =~ s/\[.[^\]]+\]//sgo;
60                                                                 $text =~ s/\s*[;\/\.]*$//sgo;
61                                                         };
62                                         ',
63                         },
64                         author  => {
65                                         xpath   => [
66                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
67                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
68                                         ],
69                                         fixup   => '
70                                                         do {
71                                                                 $text = lc(NFD($text));
72                                                                 $text =~ s/\pM+//gso;
73                                                                 $text =~ s/\s+/ /sgo;
74                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
75                                                                 $text =~ s/,?\s+.*$//sgo;
76                                                                 $text =~ s/\pM+//gso;
77                                                         };
78                                         ',
79                         },
80         ],
81
82         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
83                         title   => {
84                                         xpath   => [
85                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
86                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
87                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
88                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
89                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
90                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
91                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
92                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
93                                         ],
94                                         fixup   => '
95                                                         do {
96                                                                 $text = lc(NFD($text));
97                                                                 $text =~ s/\pM+//gso;
98                                                                 $text =~ s/\s+/ /sgo;
99                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
100                                                                 $text =~ s/\b(?:the|an?)\b//sgo;
101                                                                 $text =~ s/\[.[^\]]+\]//sgo;
102                                                                 $text =~ s/\s*[;\/\.]*$//sgo;
103                                                                 $text =~ s/\pM+//gso;
104                                                         };
105                                         ',
106                         },
107                         author  => {
108                                         xpath   => [
109                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
110                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
111                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
112                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
113                                         ],
114                                         fixup   => '
115                                                         do {
116                                                                 $text = lc(NFD($text));
117                                                                 $text =~ s/\pM+//gso;
118                                                                 $text =~ s/\s+/ /sgo;
119                                                                 $text =~ s/^\s*(.+)\s*$/$1/sgo;
120                                                                 $text =~ s/,?\s+.*$//sgo;
121                                                                 $text =~ s/\pM+//gso;
122                                                         };
123                                         ',
124                         },
125         ],
126
127 );
128
129 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
130
131 sub fingerprint_mods {
132         my $mods = shift;
133
134         my $fp_string = '';
135
136         my $match_index = 0;
137         my $block_index = 1;
138         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
139                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
140
141                         my $block_name_index = 0;
142                         my $block_value_index = 1;
143                         my $block = $fp_mods_xpath[$block_index];
144                         while ( my $part = $$block[$block_value_index] ) {
145                                 my $text;
146                                 for my $xpath ( @{ $part->{xpath} } ) {
147                                         $text = $mods->findvalue( $xpath );
148                                         last if ($text);
149                                 }
150
151                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
152
153                                 if ($text) {
154                                         eval $$part{fixup};
155                                         $fp_string .= $text;
156                                 }
157
158                                 $block_name_index += 2;
159                                 $block_value_index += 2;
160                         }
161                 }
162                 if ($fp_string) {
163                         $fp_string =~ s/\W+//gso;
164                         $log->debug("Fingerprint is [$fp_string]", INFO);;
165                         return $fp_string;
166                 }
167
168                 $match_index += 2;
169                 $block_index += 2;
170         }
171         return undef;
172 }
173
174
175
176 # --------------------------------------------------------------------------------
177
178 my $in_xact;
179 my $begin;
180 my $commit;
181 my $rollback;
182 my $lookup;
183 my $update_entry;
184 my $mr_lookup;
185 my $mr_update;
186 my $mr_create;
187 my $create_source_map;
188 my $sm_lookup;
189 my $rm_old_rd;
190 my $rm_old_sm;
191 my $rm_old_fr;
192 my $rm_old_tr;
193 my $rm_old_ar;
194 my $rm_old_sr;
195 my $rm_old_kr;
196 my $rm_old_ser;
197
198 my $fr_create;
199 my $rd_create;
200 my $create = {};
201
202 my %descriptor_code = (
203         item_type => 'substr($ldr,6,1)',
204         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
205         bib_level => 'substr($ldr,7,1)',
206         control_type => 'substr($ldr,8,1)',
207         char_encoding => 'substr($ldr,9,1)',
208         enc_level => 'substr($ldr,17,1)',
209         cat_form => 'substr($ldr,18,1)',
210         pub_status => 'substr($ldr,5,1)',
211         item_lang => 'substr($oo8,35,3)',
212         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
213         audience => 'substr($oo8,22,1)',
214 );
215
216 sub wormize {
217
218         my $self = shift;
219         my $client = shift;
220         my @docids = @_;
221
222         my $no_map = 0;
223         if ($self->api_name =~ /no_map/o) {
224                 $no_map = 1;
225         }
226
227         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
228                 unless ($in_xact);
229         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
230                 unless ($begin);
231         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
232                 unless ($commit);
233         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
234                 unless ($rollback);
235         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
236                 unless ($sm_lookup);
237         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
238                 unless ($mr_lookup);
239         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
240                 unless ($mr_update);
241         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
242                 unless ($mr_create);
243         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
244                 unless ($create_source_map);
245         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
246                 unless ($lookup);
247         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
248                 unless ($update_entry);
249         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
250                 unless ($rm_old_sm);
251         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
252                 unless ($rm_old_rd);
253         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
254                 unless ($rm_old_fr);
255         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
256                 unless ($rm_old_tr);
257         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
258                 unless ($rm_old_ar);
259         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
260                 unless ($rm_old_sr);
261         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
262                 unless ($rm_old_kr);
263         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
264                 unless ($rm_old_ser);
265         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
266                 unless ($rd_create);
267         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
268                 unless ($fr_create);
269         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
270                 unless ($$create{title});
271         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
272                 unless ($$create{author});
273         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
274                 unless ($$create{subject});
275         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
276                 unless ($$create{keyword});
277         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
278                 unless ($$create{series});
279
280
281         my ($outer_xact) = $in_xact->run;
282         try {
283                 unless ($outer_xact) {
284                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
285                         my ($r) = $begin->run($client);
286                         unless (defined $r and $r) {
287                                 $rollback->run;
288                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
289                         }
290                 }
291         } catch Error with {
292                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
293         };
294
295         my @source_maps;
296         my @entry_list;
297         my @mr_list;
298         my @rd_list;
299         my @ns_list;
300         my @mods_data;
301         my $ret = 0;
302         for my $entry ( $lookup->run(@docids) ) {
303                 # step -1: grab the doc from storage
304                 next unless ($entry);
305
306                 if(!$mods_sheet) {
307                         my $xslt_doc = $parser->parse_file(
308                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
309                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
310                 }
311
312                 my $xml = $entry->marc;
313                 my $docid = $entry->id;
314                 my $marcdoc = $parser->parse_string($xml);
315                 my $modsdoc = $mods_sheet->transform($marcdoc);
316
317                 my $mods = $modsdoc->documentElement;
318                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
319
320                 $entry->fingerprint( fingerprint_mods( $mods ) );
321                 push @entry_list, $entry;
322
323                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
324
325                 unless ($no_map) {
326                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
327                         if (!$mr || !@$mr) {
328                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
329                                 $mr = new Fieldmapper::metabib::metarecord;
330                                 $mr->fingerprint( $entry->fingerprint );
331                                 $mr->master_record( $entry->id );
332                                 my ($new_mr) = $mr_create->run($mr);
333                                 $mr->id($new_mr);
334                                 unless (defined $mr) {
335                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
336                                 }
337                         } else {
338                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
339                                 $mr->mods('');
340                                 push @mr_list, $mr;
341                         }
342
343                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
344                         $sm->metarecord( $mr->id );
345                         $sm->source( $entry->id );
346                         push @source_maps, $sm;
347                 }
348
349                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
350                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
351
352                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
353                 for my $rd_field ( keys %descriptor_code ) {
354                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
355                 }
356                 $rd_obj->record( $docid );
357                 push @rd_list, $rd_obj;
358
359                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
360
361                 # step 2: build the KOHA rows
362                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
363                 $_->record( $docid ) for (@tmp_list);
364                 push @ns_list, @tmp_list;
365
366                 $ret++;
367
368                 last unless ($self->api_name =~ /batch$/o);
369         }
370
371         $rm_old_rd->run( { record => \@docids } );
372         $rm_old_fr->run( { record => \@docids } );
373         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
374         $rm_old_tr->run( { source => \@docids } );
375         $rm_old_ar->run( { source => \@docids } );
376         $rm_old_sr->run( { source => \@docids } );
377         $rm_old_kr->run( { source => \@docids } );
378         $rm_old_ser->run( { source => \@docids } );
379
380         unless ($no_map) {
381                 my ($sm) = $create_source_map->run(@source_maps);
382                 unless (defined $sm) {
383                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
384                 }
385                 my ($mr) = $mr_update->run(@mr_list);
386                 unless (defined $mr) {
387                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
388                 }
389         }
390
391         my ($re) = $update_entry->run(@entry_list);
392         unless (defined $re) {
393                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
394         }
395
396         my ($rd) = $rd_create->run(@rd_list);
397         unless (defined $rd) {
398                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
399         }
400
401         my ($fr) = $fr_create->run(@ns_list);
402         unless (defined $fr) {
403                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
404         }
405
406         # step 5: insert the new metadata
407         for my $class ( qw/title author subject keyword series/ ) {
408                 my @md_list = ();
409                 for my $doc ( @mods_data ) {
410                         my ($did) = keys %$doc;
411                         my ($data) = values %$doc;
412
413                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
414                         for my $row ( keys %{ $$data{$class} } ) {
415                                 next unless (exists $$data{$class}{$row});
416                                 next unless ($$data{$class}{$row}{value});
417                                 my $fm_obj = $fm_constructor->new;
418                                 $fm_obj->value( $$data{$class}{$row}{value} );
419                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
420                                 $fm_obj->source( $did );
421                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
422
423                                 push @md_list, $fm_obj;
424                         }
425                 }
426                         
427                 my ($cr) = $$create{$class}->run(@md_list);
428                 unless (defined $cr) {
429                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
430                 }
431         }
432
433         unless ($outer_xact) {
434                 $log->debug("Commiting transaction started by the WoRM.", INFO);
435                 my ($c) = $commit->run;
436                 unless (defined $c and $c) {
437                         $rollback->run;
438                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
439                 }
440         }
441
442         return $ret;
443 }
444 __PACKAGE__->register_method( 
445         api_name        => "open-ils.worm.wormize",
446         method          => "wormize",
447         api_level       => 1,
448         argc            => 1,
449 );
450 __PACKAGE__->register_method( 
451         api_name        => "open-ils.worm.wormize.no_map",
452         method          => "wormize",
453         api_level       => 1,
454         argc            => 1,
455 );
456 __PACKAGE__->register_method( 
457         api_name        => "open-ils.worm.wormize.batch",
458         method          => "wormize",
459         api_level       => 1,
460         argc            => 1,
461 );
462 __PACKAGE__->register_method( 
463         api_name        => "open-ils.worm.wormize.no_map.batch",
464         method          => "wormize",
465         api_level       => 1,
466         argc            => 1,
467 );
468
469
470 my $ain_xact;
471 my $abegin;
472 my $acommit;
473 my $arollback;
474 my $alookup;
475 my $aupdate_entry;
476 my $amr_lookup;
477 my $amr_update;
478 my $amr_create;
479 my $acreate_source_map;
480 my $asm_lookup;
481 my $arm_old_rd;
482 my $arm_old_sm;
483 my $arm_old_fr;
484 my $arm_old_tr;
485 my $arm_old_ar;
486 my $arm_old_sr;
487 my $arm_old_kr;
488 my $arm_old_ser;
489
490 my $afr_create;
491 my $ard_create;
492 my $acreate = {};
493
494 sub authority_wormize {
495
496         my $self = shift;
497         my $client = shift;
498         my @docids = @_;
499
500         my $no_map = 0;
501         if ($self->api_name =~ /no_map/o) {
502                 $no_map = 1;
503         }
504
505         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
506                 unless ($in_xact);
507         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
508                 unless ($begin);
509         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
510                 unless ($commit);
511         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
512                 unless ($rollback);
513         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
514                 unless ($alookup);
515         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
516                 unless ($aupdate_entry);
517         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
518                 unless ($arm_old_rd);
519         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
520                 unless ($arm_old_fr);
521         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
522                 unless ($ard_create);
523         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
524                 unless ($afr_create);
525
526
527         my ($outer_xact) = $in_xact->run;
528         try {
529                 unless ($outer_xact) {
530                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
531                         my ($r) = $begin->run($client);
532                         unless (defined $r and $r) {
533                                 $rollback->run;
534                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
535                         }
536                 }
537         } catch Error with {
538                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
539         };
540
541         my @source_maps;
542         my @entry_list;
543         my @mr_list;
544         my @rd_list;
545         my @ns_list;
546         my @mads_data;
547         my $ret = 0;
548         for my $entry ( $lookup->run(@docids) ) {
549                 # step -1: grab the doc from storage
550                 next unless ($entry);
551
552                 #if(!$mads_sheet) {
553                 #       my $xslt_doc = $parser->parse_file(
554                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
555                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
556                 #}
557
558                 my $xml = $entry->marc;
559                 my $docid = $entry->id;
560                 my $marcdoc = $parser->parse_string($xml);
561                 #my $madsdoc = $mads_sheet->transform($marcdoc);
562
563                 #my $mads = $madsdoc->documentElement;
564                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
565
566                 push @entry_list, $entry;
567
568                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
569                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
570
571                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
572                 for my $rd_field ( keys %descriptor_code ) {
573                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
574                 }
575                 $rd_obj->record( $docid );
576                 push @rd_list, $rd_obj;
577
578                 # step 2: build the KOHA rows
579                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
580                 $_->record( $docid ) for (@tmp_list);
581                 push @ns_list, @tmp_list;
582
583                 $ret++;
584
585                 last unless ($self->api_name =~ /batch$/o);
586         }
587
588         $arm_old_rd->run( { record => \@docids } );
589         $arm_old_fr->run( { record => \@docids } );
590
591         my ($rd) = $ard_create->run(@rd_list);
592         unless (defined $rd) {
593                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
594         }
595
596         my ($fr) = $fr_create->run(@ns_list);
597         unless (defined $fr) {
598                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
599         }
600
601         unless ($outer_xact) {
602                 $log->debug("Commiting transaction started by the WoRM.", INFO);
603                 my ($c) = $commit->run;
604                 unless (defined $c and $c) {
605                         $rollback->run;
606                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
607                 }
608         }
609
610         return $ret;
611 }
612 __PACKAGE__->register_method( 
613         api_name        => "open-ils.worm.authortiy.wormize",
614         method          => "wormize",
615         api_level       => 1,
616         argc            => 1,
617 );
618 __PACKAGE__->register_method( 
619         api_name        => "open-ils.worm.authority.wormize.batch",
620         method          => "wormize",
621         api_level       => 1,
622         argc            => 1,
623 );
624
625
626 # --------------------------------------------------------------------------------
627
628
629 sub _marcxml_to_full_rows {
630
631         my $marcxml = shift;
632         my $type = shift || 'Fieldmapper::metabib::full_rec';
633
634         my @ns_list;
635         
636         my $root = $marcxml->documentElement;
637
638         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
639                 next unless $tagline;
640
641                 my $ns = new Fieldmapper::metabib::full_rec;
642
643                 $ns->tag( 'LDR' );
644                 my $val = NFD($tagline->textContent);
645                 $val =~ s/(\pM+)//gso;
646                 $ns->value( $val );
647
648                 push @ns_list, $ns;
649         }
650
651         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
652                 next unless $tagline;
653
654                 my $ns = new Fieldmapper::metabib::full_rec;
655
656                 $ns->tag( $tagline->getAttribute( "tag" ) );
657                 my $val = NFD($tagline->textContent);
658                 $val =~ s/(\pM+)//gso;
659                 $ns->value( $val );
660
661                 push @ns_list, $ns;
662         }
663
664         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
665                 next unless $tagline;
666
667                 my $tag = $tagline->getAttribute( "tag" );
668                 my $ind1 = $tagline->getAttribute( "ind1" );
669                 my $ind2 = $tagline->getAttribute( "ind2" );
670
671                 for my $data ( $tagline->childNodes ) {
672                         next unless $data;
673
674                         my $ns = $type->new;
675
676                         $ns->tag( $tag );
677                         $ns->ind1( $ind1 );
678                         $ns->ind2( $ind2 );
679                         $ns->subfield( $data->getAttribute( "code" ) );
680                         my $val = NFD($data->textContent);
681                         $val =~ s/(\pM+)//gso;
682                         $ns->value( lc($val) );
683
684                         push @ns_list, $ns;
685                 }
686         }
687         return @ns_list;
688 }
689
690 sub _get_field_value {
691
692         my( $root, $xpath ) = @_;
693
694         my $string = "";
695
696         # grab the set of matching nodes
697         my @nodes = $root->findnodes( $xpath );
698         for my $value (@nodes) {
699
700                 # grab all children of the node
701                 my @children = $value->childNodes();
702                 for my $child (@children) {
703
704                         # add the childs content to the growing buffer
705                         my $content = quotemeta($child->textContent);
706                         next if ($string =~ /$content/);  # uniquify the values
707                         $string .= $child->textContent . " ";
708                 }
709                 if( ! @children ) {
710                         $string .= $value->textContent . " ";
711                 }
712         }
713         $string = NFD($string);
714         $string =~ s/(\pM)//gso;
715         return lc($string);
716 }
717
718
719 sub modsdoc_to_values {
720         my( $self, $mods ) = @_;
721         my $data = {};
722         for my $class (keys %$xpathset) {
723                 $data->{$class} = {};
724                 for my $type (keys %{$xpathset->{$class}}) {
725                         $data->{$class}->{$type} = {};
726                         $data->{$class}->{$type}->{value} = _get_field_value( $mods, $xpathset->{$class}->{$type}->{xpath} );
727                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
728                 }
729         }
730         return $data;
731 }
732
733
734 1;
735
736