WoRM Jr
[working/Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / WoRM.pm
1 package OpenILS::Application::WoRM;
2 use base qw/OpenSRF::Application/;
3 use open qw/:utf8/;
4
5 use Unicode::Normalize;
6 use OpenSRF::EX qw/:try/;
7
8 use OpenSRF::Utils::SettingsClient;
9 use OpenSRF::Utils::Logger qw/:level/;
10
11 use OpenILS::Utils::FlatXML;
12 use OpenILS::Utils::Fieldmapper;
13 use JSON;
14
15 use OpenILS::Utils::Fieldmapper;
16
17 use XML::LibXML;
18 use XML::LibXSLT;
19 use Time::HiRes qw(time);
20
21
22 our $log = 'OpenSRF::Utils::Logger';
23 our $xml_util = OpenILS::Utils::FlatXML->new();
24
25 our $parser = XML::LibXML->new();
26 our $xslt = XML::LibXSLT->new();
27 our $mods_sheet;
28 our $mads_sheet;
29
30 our $st_sess;
31 sub st_sess {
32         my $self = shift;
33         my $sess = shift;
34         $st_sess = $sess if ($sess);
35         return $st_sess;
36 }
37
38 our $xpathset = {};
39
40 sub initialize {}
41 sub child_init {}
42
43 sub post_init {
44         $log->debug("Running post_init", DEBUG);
45
46         unless ($mods_sheet) {
47                 $log->debug("Loading MODS XSLT", DEBUG);
48                 my $xslt_doc = $parser->parse_file(
49                         OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
50                 $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
51         }
52
53         if (!__PACKAGE__->st_sess()) {
54                 $log->debug("Creating cached storage server session", DEBUG);
55                 __PACKAGE__->st_sess( OpenSRF::AppSession->create('open-ils.storage') );
56         }
57
58         unless (keys %$xpathset) {
59                 my $req = __PACKAGE__->st_sess()->request('open-ils.storage.direct.config.metabib_field.retrieve.all');
60                 while (my $resp = $req->recv) {
61                         my $f = $resp->content;
62                         $xpathset->{ $f->field_class }->{ $f->name }->{xpath} = $f->xpath;
63                         $xpathset->{ $f->field_class }->{ $f->name }->{id} = $f->id;
64                         $log->debug("Loaded XPath from DB: ".$f->field_class." => ".$f->name." : ".$f->xpath, DEBUG);
65                 }
66         }
67 }
68
69
70
71 sub ensure_transaction {
72         my $self = shift;
73         my $client = shift;
74         
75         my $outer_xact = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1);
76         
77         try {
78                 if (!$outer_xact) {
79                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
80                         __PACKAGE__->st_sess->connect;
81                         my $r = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.begin' )->gather(1);
82                         unless (defined $r and $r) {
83                                 __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.rollback' )->gather(1);
84                                 __PACKAGE__->st_sess->disconnect;
85                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
86                         }
87                 }
88         } catch Error with {
89                 $log->debug("WoRM Couldn't BEGIN transaction!", ERROR)
90         };
91
92         return __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1);
93 }
94
95 sub commit_transaction {
96         my $self = shift;
97         my $client = shift;
98
99         my $outer_xact = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.current' )->gather(1);
100
101         try {
102                 if (__PACKAGE__->st_sess->connected && $outer_xact) {
103                         my $r = __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.commit' )->gather(1);
104                         unless (defined $r and $r) {
105                                 __PACKAGE__->st_sess->request( 'open-ils.storage.transaction.rollback' )->gather(1);
106                                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT transaction!")
107                         }
108                         __PACKAGE__->st_sess->disconnect;
109                 } else {
110                         $log->debug("WoRM isn't inside a transaction.", INFO);
111                 }
112         } catch Error with {
113                 throw OpenSRF::EX::PANIC ("WoRM Couldn't COMMIT transaction!")
114         };
115 }
116
117
118 # --------------------------------------------------------------------------------
119 # MARC index extraction
120
121 package OpenILS::Application::WoRM::XPATH;
122 use base qw/OpenILS::Application::WoRM/;
123
124 # give this a MODS documentElement and an XPATH expression
125 sub _xpath_to_string {
126         my $xml = shift;
127         my $xpath = shift;
128         my $ns_uri = shift;
129         my $ns_prefix = shift;
130         my $unique = shift;
131
132         $xml->setNamespace( $ns_uri, $ns_prefix, 1 ) if ($ns_uri && $ns_prefix);
133
134         my $string = "";
135
136         # grab the set of matching nodes
137         my @nodes = $xml->findnodes( $xpath );
138         for my $value (@nodes) {
139
140                 # grab all children of the node
141                 my @children = $value->childNodes();
142                 for my $child (@children) {
143
144                         # add the childs content to the growing buffer
145                         my $content = quotemeta($child->textContent);
146                         next if ($unique && $string =~ /$content/);  # uniquify the values
147                         $string .= $child->textContent . " ";
148                 }
149                 if( ! @children ) {
150                         $string .= $value->textContent . " ";
151                 }
152         }
153         NFD($string);
154         $string =~ s/(\pM)//gso;
155         return $string;
156 }
157
158 sub class_all_index_string_xml {
159         my $self = shift;
160         my $client = shift;
161         my $xml = shift;
162         my $class = shift;
163         my $type = shift;
164
165         OpenILS::Application::WoRM->post_init();
166         $xml = $parser->parse_string($xml) unless (ref $xml);
167         $client->respond($_)
168                 for ( map { 
169                         { $_ => _xpath_to_string(
170                                 $mods_sheet->transform($xml)->documentElement,
171                                 $xpathset->{$class}->{$_}->{xpath},
172                                 "http://www.loc.gov/mods/",
173                                 "mods", 1 ) }
174                         } keys %{ $xpathset->{$class} }
175                 );
176
177         return undef;
178 }
179 __PACKAGE__->register_method(  
180         api_name        => "open-ils.worm.xpath.class.xml",
181         method          => "class_all_index_string_xml",
182         api_level       => 1,
183         argc            => 1,
184         stream          => 1,
185 );                      
186
187 sub class_all_index_string_record {
188         my $self = shift;
189         my $client = shift;
190         my $rec = shift;
191         my $class = shift;
192
193         OpenILS::Application::WoRM->post_init();
194         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1);
195
196         $client->respond($_) for ($self->method_lookup("open-ils.worm.xpath.class.xml")->run($r->marc, $class => $type));
197         return undef;
198 }
199 __PACKAGE__->register_method(  
200         api_name        => "open-ils.worm.xpath.class.record",
201         method          => "class_all_index_string_record",
202         api_level       => 1,
203         argc            => 1,
204         stream          => 1,
205 );                      
206
207
208 sub class_index_string_xml {
209         my $self = shift;
210         my $client = shift;
211         my $xml = shift;
212         my $class = shift;
213         my $type = shift;
214
215         OpenILS::Application::WoRM->post_init();
216         $xml = $parser->parse_string($xml) unless (ref $xml);
217         return _xpath_to_string( $mods_sheet->transform($xml)->documentElement, $xpathset->{$class}->{$type}->{xpath}, "http://www.loc.gov/mods/", "mods", 1 );
218 }
219 __PACKAGE__->register_method(  
220         api_name        => "open-ils.worm.xpath.xml",
221         method          => "class_index_string_xml",
222         api_level       => 1,
223         argc            => 1,
224 );                      
225
226 sub class_index_string_record {
227         my $self = shift;
228         my $client = shift;
229         my $rec = shift;
230         my $class = shift;
231         my $type = shift;
232
233         OpenILS::Application::WoRM->post_init();
234         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1);
235
236         my ($d) = $self->method_lookup("open-ils.worm.xpath.xml")->run($r->marc, $class => $type);
237         $log->debug("XPath $class->$type for bib rec $rec returns ($d)", DEBUG);
238         return $d;
239 }
240 __PACKAGE__->register_method(  
241         api_name        => "open-ils.worm.xpath.record",
242         method          => "class_index_string_record",
243         api_level       => 1,
244         argc            => 1,
245 );                      
246
247
248 # --------------------------------------------------------------------------------
249 # MARC Descriptor
250
251 package OpenILS::Application::WoRM::Biblio::Leader;
252 use base qw/OpenILS::Application::WoRM/;
253
254 our %descriptor_code = (
255         item_type => sub { substr($ldr,6,1); },
256         item_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/o) ? substr($oo8,29,1) : substr($oo8,23,1); },
257         bib_level => sub { substr($ldr,7,1); },
258         control_type => sub { substr($ldr,8,1); },
259         char_encoding => sub { substr($ldr,9,1); },
260         enc_level => sub { substr($ldr,17,1); },
261         cat_form => sub { substr($ldr,18,1); },
262         pub_status => sub { substr($ldr,5,1); },
263         item_lang => sub { substr($oo8,35,3); },
264         #lit_form => sub { (substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"; },
265         audience => sub { substr($oo8,22,1); },
266 );
267
268 sub _extract_descriptors {
269         my $xml = shift;
270
271         local $ldr = $xml->findvalue('//*[local-name()="leader"]');
272         local $oo8 = $xml->findvalue('//*[local-name()="controlfield" and @tag="008"]');
273
274         my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
275         for my $rd_field ( keys %descriptor_code ) {
276                 $rd_obj->$rd_field( $descriptor_code{$rd_field}->() );
277         }
278
279         return $rd_obj;
280 }
281
282 sub extract_desc_xml {
283         my $self = shift;
284         my $client = shift;
285         my $xml = shift;
286
287         $xml = $parser->parse_string($xml) unless (ref $xml);
288
289         return _extract_descriptors( $xml );
290 }
291 __PACKAGE__->register_method(  
292         api_name        => "open-ils.worm.biblio_leader.xml",
293         method          => "extract_desc_xml",
294         api_level       => 1,
295         argc            => 1,
296 );                      
297
298 sub extract_desc_record {
299         my $self = shift;
300         my $client = shift;
301         my $rec = shift;
302
303         OpenILS::Application::WoRM->post_init();
304         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.biblio.record_entry.retrieve" => $rec )->gather(1);
305
306         my ($d) = $self->method_lookup("open-ils.worm.biblio_leader.xml")->run($r->marc);
307         $log->debug("Record descriptor for bib rec $rec is ".JSON->perl2JSON($d), DEBUG);
308         return $d;
309 }
310 __PACKAGE__->register_method(  
311         api_name        => "open-ils.worm.biblio_leader.record",
312         method          => "extract_desc_record",
313         api_level       => 1,
314         argc            => 1,
315 );                      
316
317 # --------------------------------------------------------------------------------
318 # Flat MARC
319
320 package OpenILS::Application::WoRM::FlatMARC;
321 use base qw/OpenILS::Application::WoRM/;
322
323
324 sub _marcxml_to_full_rows {
325
326         my $marcxml = shift;
327         my $xmltype = shift || 'metabib';
328
329         my $type = "Fieldmapper::${xmltype}::full_rec";
330
331         my @ns_list;
332         
333         my ($root) = $marcxml->findnodes('//*[local-name()="record"]');
334
335         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
336                 next unless $tagline;
337
338                 my $ns = $type->new;
339
340                 $ns->tag( 'LDR' );
341                 my $val = NFD($tagline->textContent);
342                 $val =~ s/(\pM+)//gso;
343                 $ns->value( $val );
344
345                 push @ns_list, $ns;
346         }
347
348         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
349                 next unless $tagline;
350
351                 my $ns = $type->new;
352
353                 $ns->tag( $tagline->getAttribute( "tag" ) );
354                 my $val = NFD($tagline->textContent);
355                 $val =~ s/(\pM+)//gso;
356                 $ns->value( $val );
357
358                 push @ns_list, $ns;
359         }
360
361         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
362                 next unless $tagline;
363
364                 my $tag = $tagline->getAttribute( "tag" );
365                 my $ind1 = $tagline->getAttribute( "ind1" );
366                 my $ind2 = $tagline->getAttribute( "ind2" );
367
368                 for my $data ( $tagline->childNodes ) {
369                         next unless $data;
370
371                         my $ns = $type->new;
372
373                         $ns->tag( $tag );
374                         $ns->ind1( $ind1 );
375                         $ns->ind2( $ind2 );
376                         $ns->subfield( $data->getAttribute( "code" ) );
377                         my $val = NFD($data->textContent);
378                         $val =~ s/(\pM+)//gso;
379                         $ns->value( lc($val) );
380
381                         push @ns_list, $ns;
382                 }
383         }
384
385         $log->debug("Returning ".scalar(@ns_list)." Fieldmapper nodes from $xmltype xml", DEBUG);
386         return @ns_list;
387 }
388
389 sub flat_marc_xml {
390         my $self = shift;
391         my $client = shift;
392         my $xml = shift;
393
394         $xml = $parser->parse_string($xml) unless (ref $xml);
395
396         my $type = 'metabib';
397         $type = 'authority' if ($self->api_name =~ /authority/o);
398
399         OpenILS::Application::WoRM->post_init();
400
401         $client->respond($_) for (_marcxml_to_full_rows($xml, $type));
402         return undef;
403 }
404 __PACKAGE__->register_method(  
405         api_name        => "open-ils.worm.flat_marc.authority.xml",
406         method          => "flat_marc_xml",
407         api_level       => 1,
408         argc            => 1,
409         stream          => 1,
410 );                      
411 __PACKAGE__->register_method(  
412         api_name        => "open-ils.worm.flat_marc.biblio.xml",
413         method          => "flat_marc_xml",
414         api_level       => 1,
415         argc            => 1,
416         stream          => 1,
417 );                      
418
419 sub flat_marc_record {
420         my $self = shift;
421         my $client = shift;
422         my $rec = shift;
423
424         my $type = 'biblio';
425         $type = 'authority' if ($self->api_name =~ /authority/o);
426
427         OpenILS::Application::WoRM->post_init();
428         my $r = OpenILS::Application::WoRM->st_sess->request( "open-ils.storage.direct.${type}.record_entry.retrieve" => $rec )->gather(1);
429
430         $client->respond($_) for ($self->method_lookup("open-ils.worm.flat_marc.$type.xml")->run($r->marc));
431         return undef;
432 }
433 __PACKAGE__->register_method(  
434         api_name        => "open-ils.worm.flat_marc.biblio.record_entry",
435         method          => "flat_marc_record",
436         api_level       => 1,
437         argc            => 1,
438         stream          => 1,
439 );                      
440 __PACKAGE__->register_method(  
441         api_name        => "open-ils.worm.flat_marc.authority.record_entry",
442         method          => "flat_marc_record",
443         api_level       => 1,
444         argc            => 1,
445         stream          => 1,
446 );                      
447
448
449 # --------------------------------------------------------------------------------
450 # Fingerprinting
451
452 package OpenILS::Application::WoRM::Biblio::Fingerprint;
453 use base qw/OpenILS::Application::WoRM/;
454
455 my @fp_mods_xpath = (
456         '//mods:mods/mods:typeOfResource[text()="text"]' => [
457                         title   => {
458                                         xpath   => [
459                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
460                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
461                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
462                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
463                                         ],
464                                         fixup   => sub {
465                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
466                                                         NFD($text);
467                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
468                                                         $text =~ s/\pM+//gso;
469                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
470                                                         $text = lc($text);
471                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
472                                                         $text =~ s/\s+/ /sgo;
473                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
474                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
475                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
476                                                         $text =~ s/\b(?:the|an?)\b//sgo;
477                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
478                                                         $text =~ s/\[.[^\]]+\]//sgo;
479                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
480                                                         $text =~ s/\s*[;\/\.]*$//sgo;
481                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
482                                                 },
483                         },
484                         author  => {
485                                         xpath   => [
486                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
487                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
488                                         ],
489                                         fixup   => sub {
490                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
491                                                         NFD($text);
492                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
493                                                         $text =~ s/\pM+//gso;
494                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
495                                                         $text = lc($text);
496                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
497                                                         $text =~ s/\s+/ /sgo;
498                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
499                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
500                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
501                                                         $text =~ s/,?\s+.*$//sgo;
502                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
503                                                 },
504                         },
505         ],
506
507         '//mods:mods/mods:relatedItem[@type!="host" and @type!="series"]' => [
508                         title   => {
509                                         xpath   => [
510                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="uniform")]',
511                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="translated")]',
512                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and (@type="alternative")]',
513                                                         '//mods:mods/mods:relatedItem/mods:titleInfo[mods:title and not(@type)]',
514                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="uniform")]',
515                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="translated")]',
516                                                         '//mods:mods/mods:titleInfo[mods:title and (@type="alternative")]',
517                                                         '//mods:mods/mods:titleInfo[mods:title and not(@type)]',
518                                         ],
519                                         fixup   => sub {
520                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
521                                                         NFD($text);
522                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
523                                                         $text =~ s/\pM+//gso;
524                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
525                                                         $text = lc($text);
526                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
527                                                         $text =~ s/\s+/ /sgo;
528                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
529                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
530                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
531                                                         $text =~ s/\b(?:the|an?)\b//sgo;
532                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
533                                                         $text =~ s/\[.[^\]]+\]//sgo;
534                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
535                                                         $text =~ s/\s*[;\/\.]*$//sgo;
536                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
537                                                 },
538                         },
539                         author  => {
540                                         xpath   => [
541                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
542                                                         '//mods:mods/mods:relatedItem/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
543                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator" and @type="personal"]/mods:namePart',
544                                                         '//mods:mods/mods:name[mods:role/mods:text/text()="creator"]/mods:namePart',
545                                         ],
546                                         fixup   => sub {
547                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
548                                                         NFD($text);
549                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
550                                                         $text =~ s/\pM+//gso;
551                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
552                                                         $text = lc($text);
553                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
554                                                         $text =~ s/\s+/ /sgo;
555                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
556                                                         $text =~ s/^\s*(.+)\s*$/$1/sgo;
557                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
558                                                         $text =~ s/,?\s+.*$//sgo;
559                                                         $log->debug("Fingerprint text /durring/ fixup : [$text]", INTERNAL);
560                                                 },
561                         },
562         ],
563
564 );
565
566 push @fp_mods_xpath, '//mods:mods/mods:titleInfo' => $fp_mods_xpath[1];
567
568 sub _fp_mods {
569         my $mods = shift;
570         $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
571
572         my $fp_string = '';
573
574         my $match_index = 0;
575         my $block_index = 1;
576         while ( my $match_xpath = $fp_mods_xpath[$match_index] ) {
577                 if ( my @nodes = $mods->findnodes( $match_xpath ) ) {
578
579                         my $block_name_index = 0;
580                         my $block_value_index = 1;
581                         my $block = $fp_mods_xpath[$block_index];
582                         while ( my $part = $$block[$block_value_index] ) {
583                                 local $text;
584                                 for my $xpath ( @{ $part->{xpath} } ) {
585                                         $text = $mods->findvalue( $xpath );
586                                         last if ($text);
587                                 }
588
589                                 $log->debug("Found fingerprint text using $$block[$block_name_index] : [$text]", DEBUG);
590
591                                 if ($text) {
592                                         $$part{fixup}->();
593                                         $log->debug("Fingerprint text after fixup : [$text]", DEBUG);
594                                         $fp_string .= $text;
595                                 }
596
597                                 $block_name_index += 2;
598                                 $block_value_index += 2;
599                         }
600                 }
601                 if ($fp_string) {
602                         $fp_string =~ s/\W+//gso;
603                         $log->debug("Fingerprint is [$fp_string]", INFO);;
604                         return $fp_string;
605                 }
606
607                 $match_index += 2;
608                 $block_index += 2;
609         }
610         return undef;
611 }
612
613
614 sub fingerprint_bibrec {
615         my $self = shift;
616         my $client = shift;
617         my $rec = shift;
618
619         OpenILS::Application::WoRM->post_init();
620         my $r = OpenILS::Application::WoRM->st_sess->request( 'open-ils.storage.direct.biblio.record_entry.retrieve' => $rec )->gather(1);
621
622         my ($fp) = $self->method_lookup('open-ils.worm.fingerprint.marc')->run($r->marc);
623         $log->debug("Returning [$fp] as fingerprint for record $rec", INFO);
624         return $fp;
625
626 }
627 __PACKAGE__->register_method(  
628         api_name        => "open-ils.worm.fingerprint.bib_record",
629         method          => "fingerprint_bibrec",
630         api_level       => 1,
631         argc            => 1,
632 );                      
633
634 sub fingerprint_mods {
635         my $self = shift;
636         my $client = shift;
637         my $xml = shift;
638
639         OpenILS::Application::WoRM->post_init();
640         my $mods = $parser->parse_string($xml)->documentElement;
641
642         return _fp_mods( $mods );
643 }
644 __PACKAGE__->register_method(  
645         api_name        => "open-ils.worm.fingerprint.mods",
646         method          => "fingerprint_mods",
647         api_level       => 1,
648         argc            => 1,
649 );                      
650
651 sub fingerprint_marc {
652         my $self = shift;
653         my $client = shift;
654         my $xml = shift;
655
656         $xml = $parser->parse_string($xml) unless (ref $xml);
657
658         OpenILS::Application::WoRM->post_init();
659         my $fp = _fp_mods( $mods_sheet->transform($xml)->documentElement );
660         $log->debug("Returning [$fp] as fingerprint", INFO);
661         return $fp;
662 }
663 __PACKAGE__->register_method(  
664         api_name        => "open-ils.worm.fingerprint.marc",
665         method          => "fingerprint_marc",
666         api_level       => 1,
667         argc            => 1,
668 );                      
669
670
671 # --------------------------------------------------------------------------------
672
673 1;
674 __END__
675 my $in_xact;
676 my $begin;
677 my $commit;
678 my $rollback;
679 my $lookup;
680 my $update_entry;
681 my $mr_lookup;
682 my $mr_update;
683 my $mr_create;
684 my $create_source_map;
685 my $sm_lookup;
686 my $rm_old_rd;
687 my $rm_old_sm;
688 my $rm_old_fr;
689 my $rm_old_tr;
690 my $rm_old_ar;
691 my $rm_old_sr;
692 my $rm_old_kr;
693 my $rm_old_ser;
694
695 my $fr_create;
696 my $rd_create;
697 my $create = {};
698
699 my %descriptor_code = (
700         item_type => 'substr($ldr,6,1)',
701         item_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,29,1) : substr($oo8,23,1)',
702         bib_level => 'substr($ldr,7,1)',
703         control_type => 'substr($ldr,8,1)',
704         char_encoding => 'substr($ldr,9,1)',
705         enc_level => 'substr($ldr,17,1)',
706         cat_form => 'substr($ldr,18,1)',
707         pub_status => 'substr($ldr,5,1)',
708         item_lang => 'substr($oo8,35,3)',
709         #lit_form => '(substr($ldr,6,1) =~ /^(?:f|g|i|m|o|p|r)$/) ? substr($oo8,33,1) : "0"',
710         audience => 'substr($oo8,22,1)',
711 );
712
713 sub wormize {
714
715         my $self = shift;
716         my $client = shift;
717         my @docids = @_;
718
719         my $no_map = 0;
720         if ($self->api_name =~ /no_map/o) {
721                 $no_map = 1;
722         }
723
724         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
725                 unless ($in_xact);
726         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
727                 unless ($begin);
728         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
729                 unless ($commit);
730         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
731                 unless ($rollback);
732         $sm_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.search.source')
733                 unless ($sm_lookup);
734         $mr_lookup = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.search.fingerprint')
735                 unless ($mr_lookup);
736         $mr_update = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.batch.update')
737                 unless ($mr_update);
738         $mr_create = $self->method_lookup('open-ils.storage.direct.metabib.metarecord.create')
739                 unless ($mr_create);
740         $create_source_map = $self->method_lookup('open-ils.storage.direct.metabib.metarecord_source_map.batch.create')
741                 unless ($create_source_map);
742         $lookup = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.retrieve')
743                 unless ($lookup);
744         $update_entry = $self->method_lookup('open-ils.storage.direct.biblio.record_entry.batch.update')
745                 unless ($update_entry);
746         $rm_old_sm = $self->method_lookup( 'open-ils.storage.direct.metabib.metarecord_source_map.mass_delete')
747                 unless ($rm_old_sm);
748         $rm_old_rd = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.mass_delete')
749                 unless ($rm_old_rd);
750         $rm_old_fr = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.mass_delete')
751                 unless ($rm_old_fr);
752         $rm_old_tr = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.mass_delete')
753                 unless ($rm_old_tr);
754         $rm_old_ar = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.mass_delete')
755                 unless ($rm_old_ar);
756         $rm_old_sr = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.mass_delete')
757                 unless ($rm_old_sr);
758         $rm_old_kr = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.mass_delete')
759                 unless ($rm_old_kr);
760         $rm_old_ser = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.mass_delete')
761                 unless ($rm_old_ser);
762         $rd_create = $self->method_lookup( 'open-ils.storage.direct.metabib.record_descriptor.batch.create')
763                 unless ($rd_create);
764         $fr_create = $self->method_lookup( 'open-ils.storage.direct.metabib.full_rec.batch.create')
765                 unless ($fr_create);
766         $$create{title} = $self->method_lookup( 'open-ils.storage.direct.metabib.title_field_entry.batch.create')
767                 unless ($$create{title});
768         $$create{author} = $self->method_lookup( 'open-ils.storage.direct.metabib.author_field_entry.batch.create')
769                 unless ($$create{author});
770         $$create{subject} = $self->method_lookup( 'open-ils.storage.direct.metabib.subject_field_entry.batch.create')
771                 unless ($$create{subject});
772         $$create{keyword} = $self->method_lookup( 'open-ils.storage.direct.metabib.keyword_field_entry.batch.create')
773                 unless ($$create{keyword});
774         $$create{series} = $self->method_lookup( 'open-ils.storage.direct.metabib.series_field_entry.batch.create')
775                 unless ($$create{series});
776
777
778         my ($outer_xact) = $in_xact->run;
779         try {
780                 unless ($outer_xact) {
781                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
782                         my ($r) = $begin->run($client);
783                         unless (defined $r and $r) {
784                                 $rollback->run;
785                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
786                         }
787                 }
788         } catch Error with {
789                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
790         };
791
792         my @source_maps;
793         my @entry_list;
794         my @mr_list;
795         my @rd_list;
796         my @ns_list;
797         my @mods_data;
798         my $ret = 0;
799         for my $entry ( $lookup->run(@docids) ) {
800                 # step -1: grab the doc from storage
801                 next unless ($entry);
802
803                 if(!$mods_sheet) {
804                         my $xslt_doc = $parser->parse_file(
805                                 OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
806                         $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
807                 }
808
809                 my $xml = $entry->marc;
810                 my $docid = $entry->id;
811                 my $marcdoc = $parser->parse_string($xml);
812                 my $modsdoc = $mods_sheet->transform($marcdoc);
813
814                 my $mods = $modsdoc->documentElement;
815                 $mods->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
816
817                 $entry->fingerprint( fingerprint_mods( $mods ) );
818                 push @entry_list, $entry;
819
820                 $log->debug("Fingerprint for Record Entry ".$docid." is [".$entry->fingerprint."]", INFO);
821
822                 unless ($no_map) {
823                         my ($mr) = $mr_lookup->run( $entry->fingerprint );
824                         if (!$mr || !@$mr) {
825                                 $log->debug("No metarecord found for fingerprint [".$entry->fingerprint."]; Creating a new one", INFO);
826                                 $mr = new Fieldmapper::metabib::metarecord;
827                                 $mr->fingerprint( $entry->fingerprint );
828                                 $mr->master_record( $entry->id );
829                                 my ($new_mr) = $mr_create->run($mr);
830                                 $mr->id($new_mr);
831                                 unless (defined $mr) {
832                                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.create!")
833                                 }
834                         } else {
835                                 $log->debug("Retrieved metarecord, id is ".$mr->id, INFO);
836                                 $mr->mods('');
837                                 push @mr_list, $mr;
838                         }
839
840                         my $sm = new Fieldmapper::metabib::metarecord_source_map;
841                         $sm->metarecord( $mr->id );
842                         $sm->source( $entry->id );
843                         push @source_maps, $sm;
844                 }
845
846                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
847                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
848
849                 my $rd_obj = Fieldmapper::metabib::record_descriptor->new;
850                 for my $rd_field ( keys %descriptor_code ) {
851                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
852                 }
853                 $rd_obj->record( $docid );
854                 push @rd_list, $rd_obj;
855
856                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods ) };
857
858                 # step 2: build the KOHA rows
859                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
860                 $_->record( $docid ) for (@tmp_list);
861                 push @ns_list, @tmp_list;
862
863                 $ret++;
864
865                 last unless ($self->api_name =~ /batch$/o);
866         }
867
868         $rm_old_rd->run( { record => \@docids } );
869         $rm_old_fr->run( { record => \@docids } );
870         $rm_old_sm->run( { source => \@docids } ) unless ($no_map);
871         $rm_old_tr->run( { source => \@docids } );
872         $rm_old_ar->run( { source => \@docids } );
873         $rm_old_sr->run( { source => \@docids } );
874         $rm_old_kr->run( { source => \@docids } );
875         $rm_old_ser->run( { source => \@docids } );
876
877         unless ($no_map) {
878                 my ($sm) = $create_source_map->run(@source_maps);
879                 unless (defined $sm) {
880                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord_source_map.batch.create!")
881                 }
882                 my ($mr) = $mr_update->run(@mr_list);
883                 unless (defined $mr) {
884                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.metarecord.batch.update!")
885                 }
886         }
887
888         my ($re) = $update_entry->run(@entry_list);
889         unless (defined $re) {
890                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.biblio.record_entry.batch.update!")
891         }
892
893         my ($rd) = $rd_create->run(@rd_list);
894         unless (defined $rd) {
895                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.record_descriptor.batch.create!")
896         }
897
898         my ($fr) = $fr_create->run(@ns_list);
899         unless (defined $fr) {
900                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.full_rec.batch.create!")
901         }
902
903         # step 5: insert the new metadata
904         for my $class ( qw/title author subject keyword series/ ) {
905                 my @md_list = ();
906                 for my $doc ( @mods_data ) {
907                         my ($did) = keys %$doc;
908                         my ($data) = values %$doc;
909
910                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
911                         for my $row ( keys %{ $$data{$class} } ) {
912                                 next unless (exists $$data{$class}{$row});
913                                 next unless ($$data{$class}{$row}{value});
914                                 my $fm_obj = $fm_constructor->new;
915                                 $fm_obj->value( $$data{$class}{$row}{value} );
916                                 $fm_obj->field( $$data{$class}{$row}{field_id} );
917                                 $fm_obj->source( $did );
918                                 $log->debug("$class entry: ".$fm_obj->source." => ".$fm_obj->field." : ".$fm_obj->value, DEBUG);
919
920                                 push @md_list, $fm_obj;
921                         }
922                 }
923                         
924                 my ($cr) = $$create{$class}->run(@md_list);
925                 unless (defined $cr) {
926                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.metabib.${class}_field_entry.batch.create!")
927                 }
928         }
929
930         unless ($outer_xact) {
931                 $log->debug("Commiting transaction started by the WoRM.", INFO);
932                 my ($c) = $commit->run;
933                 unless (defined $c and $c) {
934                         $rollback->run;
935                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
936                 }
937         }
938
939         return $ret;
940 }
941 __PACKAGE__->register_method( 
942         api_name        => "open-ils.worm.wormize",
943         method          => "wormize",
944         api_level       => 1,
945         argc            => 1,
946 );
947 __PACKAGE__->register_method( 
948         api_name        => "open-ils.worm.wormize.no_map",
949         method          => "wormize",
950         api_level       => 1,
951         argc            => 1,
952 );
953 __PACKAGE__->register_method( 
954         api_name        => "open-ils.worm.wormize.batch",
955         method          => "wormize",
956         api_level       => 1,
957         argc            => 1,
958 );
959 __PACKAGE__->register_method( 
960         api_name        => "open-ils.worm.wormize.no_map.batch",
961         method          => "wormize",
962         api_level       => 1,
963         argc            => 1,
964 );
965
966
967 my $ain_xact;
968 my $abegin;
969 my $acommit;
970 my $arollback;
971 my $alookup;
972 my $aupdate_entry;
973 my $amr_lookup;
974 my $amr_update;
975 my $amr_create;
976 my $acreate_source_map;
977 my $asm_lookup;
978 my $arm_old_rd;
979 my $arm_old_sm;
980 my $arm_old_fr;
981 my $arm_old_tr;
982 my $arm_old_ar;
983 my $arm_old_sr;
984 my $arm_old_kr;
985 my $arm_old_ser;
986
987 my $afr_create;
988 my $ard_create;
989 my $acreate = {};
990
991 sub authority_wormize {
992
993         my $self = shift;
994         my $client = shift;
995         my @docids = @_;
996
997         my $no_map = 0;
998         if ($self->api_name =~ /no_map/o) {
999                 $no_map = 1;
1000         }
1001
1002         $in_xact = $self->method_lookup( 'open-ils.storage.transaction.current')
1003                 unless ($in_xact);
1004         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
1005                 unless ($begin);
1006         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
1007                 unless ($commit);
1008         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
1009                 unless ($rollback);
1010         $alookup = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.retrieve')
1011                 unless ($alookup);
1012         $aupdate_entry = $self->method_lookup('open-ils.storage.direct.authority.record_entry.batch.update')
1013                 unless ($aupdate_entry);
1014         $arm_old_rd = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.mass_delete')
1015                 unless ($arm_old_rd);
1016         $arm_old_fr = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.mass_delete')
1017                 unless ($arm_old_fr);
1018         $ard_create = $self->method_lookup( 'open-ils.storage.direct.authority.record_descriptor.batch.create')
1019                 unless ($ard_create);
1020         $afr_create = $self->method_lookup( 'open-ils.storage.direct.authority.full_rec.batch.create')
1021                 unless ($afr_create);
1022
1023
1024         my ($outer_xact) = $in_xact->run;
1025         try {
1026                 unless ($outer_xact) {
1027                         $log->debug("WoRM isn't inside a transaction, starting one now.", INFO);
1028                         my ($r) = $begin->run($client);
1029                         unless (defined $r and $r) {
1030                                 $rollback->run;
1031                                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
1032                         }
1033                 }
1034         } catch Error with {
1035                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
1036         };
1037
1038         my @source_maps;
1039         my @entry_list;
1040         my @mr_list;
1041         my @rd_list;
1042         my @ns_list;
1043         my @mads_data;
1044         my $ret = 0;
1045         for my $entry ( $lookup->run(@docids) ) {
1046                 # step -1: grab the doc from storage
1047                 next unless ($entry);
1048
1049                 #if(!$mads_sheet) {
1050                 #       my $xslt_doc = $parser->parse_file(
1051                 #               OpenSRF::Utils::SettingsClient->new->config_value(dirs => 'xsl') .  "/MARC21slim2MODS.xsl");
1052                 #       $mads_sheet = $xslt->parse_stylesheet( $xslt_doc );
1053                 #}
1054
1055                 my $xml = $entry->marc;
1056                 my $docid = $entry->id;
1057                 my $marcdoc = $parser->parse_string($xml);
1058                 #my $madsdoc = $mads_sheet->transform($marcdoc);
1059
1060                 #my $mads = $madsdoc->documentElement;
1061                 #$mads->setNamespace( "http://www.loc.gov/mads/", "mads", 1 );
1062
1063                 push @entry_list, $entry;
1064
1065                 my $ldr = $marcdoc->documentElement->getChildrenByTagName('leader')->pop->textContent;
1066                 my $oo8 = $marcdoc->documentElement->findvalue('//*[local-name()="controlfield" and @tag="008"]');
1067
1068                 my $rd_obj = Fieldmapper::authority::record_descriptor->new;
1069                 for my $rd_field ( keys %descriptor_code ) {
1070                         $rd_obj->$rd_field( eval "$descriptor_code{$rd_field};" );
1071                 }
1072                 $rd_obj->record( $docid );
1073                 push @rd_list, $rd_obj;
1074
1075                 # step 2: build the KOHA rows
1076                 my @tmp_list = _marcxml_to_full_rows( $marcdoc, 'Fieldmapper::authority::full_rec' );
1077                 $_->record( $docid ) for (@tmp_list);
1078                 push @ns_list, @tmp_list;
1079
1080                 $ret++;
1081
1082                 last unless ($self->api_name =~ /batch$/o);
1083         }
1084
1085         $arm_old_rd->run( { record => \@docids } );
1086         $arm_old_fr->run( { record => \@docids } );
1087
1088         my ($rd) = $ard_create->run(@rd_list);
1089         unless (defined $rd) {
1090                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.record_descriptor.batch.create!")
1091         }
1092
1093         my ($fr) = $fr_create->run(@ns_list);
1094         unless (defined $fr) {
1095                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.direct.authority.full_rec.batch.create!")
1096         }
1097
1098         unless ($outer_xact) {
1099                 $log->debug("Commiting transaction started by the WoRM.", INFO);
1100                 my ($c) = $commit->run;
1101                 unless (defined $c and $c) {
1102                         $rollback->run;
1103                         throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
1104                 }
1105         }
1106
1107         return $ret;
1108 }
1109 __PACKAGE__->register_method( 
1110         api_name        => "open-ils.worm.authortiy.wormize",
1111         method          => "wormize",
1112         api_level       => 1,
1113         argc            => 1,
1114 );
1115 __PACKAGE__->register_method( 
1116         api_name        => "open-ils.worm.authority.wormize.batch",
1117         method          => "wormize",
1118         api_level       => 1,
1119         argc            => 1,
1120 );
1121
1122
1123 # --------------------------------------------------------------------------------
1124
1125
1126 sub _marcxml_to_full_rows {
1127
1128         my $marcxml = shift;
1129         my $type = shift || 'Fieldmapper::metabib::full_rec';
1130
1131         my @ns_list;
1132         
1133         my $root = $marcxml->documentElement;
1134
1135         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
1136                 next unless $tagline;
1137
1138                 my $ns = new Fieldmapper::metabib::full_rec;
1139
1140                 $ns->tag( 'LDR' );
1141                 my $val = NFD($tagline->textContent);
1142                 $val =~ s/(\pM+)//gso;
1143                 $ns->value( $val );
1144
1145                 push @ns_list, $ns;
1146         }
1147
1148         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
1149                 next unless $tagline;
1150
1151                 my $ns = new Fieldmapper::metabib::full_rec;
1152
1153                 $ns->tag( $tagline->getAttribute( "tag" ) );
1154                 my $val = NFD($tagline->textContent);
1155                 $val =~ s/(\pM+)//gso;
1156                 $ns->value( $val );
1157
1158                 push @ns_list, $ns;
1159         }
1160
1161         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
1162                 next unless $tagline;
1163
1164                 my $tag = $tagline->getAttribute( "tag" );
1165                 my $ind1 = $tagline->getAttribute( "ind1" );
1166                 my $ind2 = $tagline->getAttribute( "ind2" );
1167
1168                 for my $data ( $tagline->childNodes ) {
1169                         next unless $data;
1170
1171                         my $ns = $type->new;
1172
1173                         $ns->tag( $tag );
1174                         $ns->ind1( $ind1 );
1175                         $ns->ind2( $ind2 );
1176                         $ns->subfield( $data->getAttribute( "code" ) );
1177                         my $val = NFD($data->textContent);
1178                         $val =~ s/(\pM+)//gso;
1179                         $ns->value( lc($val) );
1180
1181                         push @ns_list, $ns;
1182                 }
1183         }
1184         return @ns_list;
1185 }
1186
1187 sub _get_field_value {
1188
1189         my( $root, $xpath ) = @_;
1190
1191         my $string = "";
1192
1193         # grab the set of matching nodes
1194         my @nodes = $root->findnodes( $xpath );
1195         for my $value (@nodes) {
1196
1197                 # grab all children of the node
1198                 my @children = $value->childNodes();
1199                 for my $child (@children) {
1200
1201                         # add the childs content to the growing buffer
1202                         my $content = quotemeta($child->textContent);
1203                         next if ($string =~ /$content/);  # uniquify the values
1204                         $string .= $child->textContent . " ";
1205                 }
1206                 if( ! @children ) {
1207                         $string .= $value->textContent . " ";
1208                 }
1209         }
1210         $string = NFD($string);
1211         $string =~ s/(\pM)//gso;
1212         return lc($string);
1213 }
1214
1215
1216 sub modsdoc_to_values {
1217         my( $self, $mods ) = @_;
1218         my $data = {};
1219         for my $class (keys %$xpathset) {
1220                 $data->{$class} = {};
1221                 for my $type (keys %{$xpathset->{$class}}) {
1222                         $data->{$class}->{$type} = {};
1223                         $data->{$class}->{$type}->{field_id} = $xpathset->{$class}->{$type}->{id};
1224                 }
1225         }
1226         return $data;
1227 }
1228
1229
1230 1;
1231
1232