3dfe28f8042152506db1a503636428cdaeca28ce
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Storage / WORM.pm
1 package OpenILS::Application::Storage::WORM;
2 use base qw/OpenILS::Application::Storage/;
3 use strict; use warnings;
4
5 use OpenSRF::EX qw/:try/;
6
7 use OpenILS::Utils::FlatXML;
8 use OpenILS::Utils::Fieldmapper;
9 use JSON;
10
11 use XML::LibXML;
12 use XML::LibXSLT;
13 use Time::HiRes qw(time);
14
15 my $xml_util    = OpenILS::Utils::FlatXML->new();
16
17 my $parser              = XML::LibXML->new();
18 my $xslt                        = XML::LibXSLT->new();
19 my $xslt_doc    =       $parser->parse_file( "/home/miker/cvs/OpenILS/app_server/stylesheets/MARC21slim2MODS.xsl" );
20 #my $xslt_doc   = $parser->parse_file( "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
21 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
22
23 use open qw/:utf8/;
24
25
26 sub child_init {
27         #try {
28         #       __PACKAGE__->method_lookup('i.do.not.exist');
29         #} catch Error with {
30         #       warn shift();
31         #};
32 }
33
34
35 # get me from the database
36 my $xpathset = {
37
38         title => {
39
40                 abbreviated => 
41                         "//mods:mods/mods:titleInfo[mods:title and (\@type='abreviated')]",
42
43                 translated =>
44                         "//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
45
46                 uniform =>
47                         "//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
48
49                 proper =>
50                         "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
51         },
52
53         author => {
54
55                 corporate => 
56                         "//mods:mods/mods:name[\@type='corporate']/mods:namePart".
57                                 "[../mods:role/mods:text[text()='creator']][1]",
58
59                 personal => 
60                         "//mods:mods/mods:name[\@type='personal']/mods:namePart".
61                                 "[../mods:role/mods:text[text()='creator']][1]",
62
63                 conference => 
64                         "//mods:mods/mods:name[\@type='conference']/mods:namePart".
65                                 "[../mods:role/mods:text[text()='creator']][1]",
66
67                 other => 
68                         "//mods:mods/mods:name[\@type='personal']/mods:namePart",
69         },
70
71         subject => {
72
73                 geographic => 
74                         "//mods:mods/mods:subject/mods:geographic",
75
76                 name => 
77                         "//mods:mods/mods:subject/mods:name",
78
79                 temporal => 
80                         "//mods:mods/mods:subject/mods:temporal",
81
82                 topic => 
83                         "//mods:mods/mods:subject/mods:topic",
84
85                 genre => 
86                         "//mods:mods/mods:genre",
87
88         },
89
90         keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
91
92 };
93
94
95 # --------------------------------------------------------------------------------
96
97 my $begin;
98 my $commit;
99 my $rollback;
100 my $lookup;
101 my $rm_old_fr;
102 my $rm_old_tr;
103 my $rm_old_ar;
104 my $rm_old_sr;
105 my $rm_old_kr;
106
107 my $fr_create;
108 my $create = {};
109
110
111 sub wormize {
112
113         my $self = shift;
114         my $client = shift;
115         my @docids = @_;
116
117         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
118                 unless ($begin);
119         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
120                 unless ($commit);
121         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
122                 unless ($rollback);
123         $lookup = $self->method_lookup('open-ils.storage.biblio.record_marc.batch.retrieve')
124                 unless ($lookup);
125         $rm_old_fr = $self->method_lookup( 'open-ils.storage.metabib.full_rec.mass_delete')
126                 unless ($rm_old_fr);
127         $rm_old_tr = $self->method_lookup( 'open-ils.storage.metabib.title_field_entry.mass_delete')
128                 unless ($rm_old_tr);
129         $rm_old_ar = $self->method_lookup( 'open-ils.storage.metabib.author_field_entry.mass_delete')
130                 unless ($rm_old_ar);
131         $rm_old_sr = $self->method_lookup( 'open-ils.storage.metabib.subject_field_entry.mass_delete')
132                 unless ($rm_old_sr);
133         $rm_old_kr = $self->method_lookup( 'open-ils.storage.metabib.keyword_field_entry.mass_delete')
134                 unless ($rm_old_kr);
135         $fr_create = $self->method_lookup( 'open-ils.storage.metabib.full_rec.batch.create')
136                 unless ($fr_create);
137         $$create{title} = $self->method_lookup( 'open-ils.storage.metabib.title_field_entry.batch.create')
138                 unless ($$create{title});
139         $$create{author} = $self->method_lookup( 'open-ils.storage.metabib.author_field_entry.batch.create')
140                 unless ($$create{author});
141         $$create{subject} = $self->method_lookup( 'open-ils.storage.metabib.subject_field_entry.batch.create')
142                 unless ($$create{subject});
143         $$create{keyword} = $self->method_lookup( 'open-ils.storage.metabib.keyword_field_entry.batch.create')
144                 unless ($$create{keyword});
145
146
147         try {
148                 my ($r) = $begin->run($client);
149                 unless (defined $r and $r) {
150                         $rollback->run;
151                         throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
152                 }
153         } catch Error with {
154                 throw OpenSRF::EX::PANIC ("WoRM Couldn't BEGIN transaction!")
155         };
156
157         my @ns_list;
158         my @mods_data;
159         my $ret = 0;
160         for my $marc ( $lookup->run(@docids) ) {
161                 # step -1: grab the doc from storage
162                 next unless ($marc);
163
164                 my $xml = $marc->marc;
165                 my $docid = $marc->id;
166                 my $marcdoc = $parser->parse_string($xml);
167
168                 push @mods_data, { $docid => $self->modsdoc_to_values( $mods_sheet->transform($marcdoc) ) };
169
170                 # step 2: build the KOHA rows
171                 my @tmp_list = _marcxml_to_full_rows( $marcdoc );
172                 $_->record( $docid ) for (@tmp_list);
173                 push @ns_list, @tmp_list;
174
175                 $ret++;
176
177                 last unless ($self->api_name =~ /batch$/o);
178         }
179
180         $rm_old_fr->run( { record => \@docids } );
181         $rm_old_tr->run( { source => \@docids } );
182         $rm_old_ar->run( { source => \@docids } );
183         $rm_old_sr->run( { source => \@docids } );
184         $rm_old_kr->run( { source => \@docids } );
185
186         my ($fr) = $fr_create->run(@ns_list);
187         unless (defined $fr) {
188                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.metabib.full_rec.batch.create!")
189         }
190
191         # step 5: insert the new metadata
192         for my $class ( qw/title author subject keyword/ ) {
193                 my @md_list = ();
194                 for my $doc ( @mods_data ) {
195                         my ($did) = keys %$doc;
196                         my ($data) = values %$doc;
197
198                         my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
199                         for my $row ( keys %{ $$data{$class} } ) {
200                                 next unless (exists $$data{$class}{$row});
201                                 next unless ($$data{$class}{$row});
202                                 my $fm_obj = $fm_constructor->new;
203                                 $fm_obj->value( $$data{$class}{$row} );
204                                 $fm_obj->source( $did );
205
206                                 # XXX This needs to be a real thing once the xpath is in the DB
207                                 $fm_obj->field( 1 );
208
209                                 push @md_list, $fm_obj;
210                         }
211                 }
212                         
213                 my ($cr) = $$create{$class}->run(@md_list);
214                 unless (defined $cr) {
215                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.metabib.${class}_field_entry.batch.create!")
216                 }
217         }
218
219         my ($c) = $commit->run;
220         unless (defined $c and $c) {
221                 $rollback->run;
222                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
223         }
224
225         return $ret;
226 }
227 __PACKAGE__->register_method( 
228         api_name        => "open-ils.worm.wormize",
229         method          => "wormize",
230         api_level       => 1,
231         argc            => 1,
232 );
233 __PACKAGE__->register_method( 
234         api_name        => "open-ils.worm.wormize.batch",
235         method          => "wormize",
236         api_level       => 1,
237         argc            => 1,
238 );
239
240
241 # --------------------------------------------------------------------------------
242
243
244 sub _marcxml_to_full_rows {
245
246         my $marcxml = shift;
247
248         my @ns_list;
249         
250         my $root = $marcxml->documentElement;
251
252         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
253                 next unless $tagline;
254
255                 my $ns = new Fieldmapper::metabib::full_rec;
256
257                 $ns->tag( 'LDR' );
258                 my $val = $tagline->textContent;
259                 $val =~ s/(\pM)//gso;
260                 $ns->value( $val );
261
262                 push @ns_list, $ns;
263         }
264
265         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
266                 next unless $tagline;
267
268                 my $ns = new Fieldmapper::metabib::full_rec;
269
270                 $ns->tag( $tagline->getAttribute( "tag" ) );
271                 my $val = $tagline->textContent;
272                 $val =~ s/(\pM)//gso;
273                 $ns->value( $val );
274
275                 push @ns_list, $ns;
276         }
277
278         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
279                 next unless $tagline;
280
281                 for my $data ( @{$tagline->getChildrenByTagName("subfield")} ) {
282                         next unless $tagline;
283
284                         my $ns = new Fieldmapper::metabib::full_rec;
285
286                         $ns->tag( $tagline->getAttribute( "tag" ) );
287                         $ns->ind1( $tagline->getAttribute( "ind1" ) );
288                         $ns->ind2( $tagline->getAttribute( "ind2" ) );
289                         $ns->subfield( $data->getAttribute( "code" ) );
290                         my $val = $data->textContent;
291                         $val =~ s/(\pM)//gso;
292                         $ns->value( lc($val) );
293
294                         push @ns_list, $ns;
295                 }
296         }
297         return @ns_list;
298 }
299
300 sub _get_field_value {
301
302         my( $mods, $xpath ) = @_;
303
304         my $string = "";
305         my $root = $mods->documentElement;
306         $root->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
307
308         # grab the set of matching nodes
309         my @nodes = $root->findnodes( $xpath );
310         for my $value (@nodes) {
311
312                 # grab all children of the node
313                 my @children = $value->childNodes();
314                 for my $child (@children) {
315
316                         # add the childs content to the growing buffer
317                         my $content = quotemeta($child->textContent);
318                         next if ($string =~ /$content/);  # uniquify the values
319                         $string .= $child->textContent . " ";
320                 }
321                 if( ! @children ) {
322                         $string .= $value->textContent . " ";
323                 }
324         }
325         $string =~ s/(\pM)//gso;
326         return lc($string);
327 }
328
329
330 sub modsdoc_to_values {
331         my( $self, $mods ) = @_;
332         my $data = {};
333         for my $class (keys %$xpathset) {
334                 $data->{$class} = {};
335                 for my $type(keys %{$xpathset->{$class}}) {
336                         my $value = _get_field_value( $mods, $xpathset->{$class}->{$type} );
337                         $data->{$class}->{$type} = $value;
338                 }
339         }
340         return $data;
341 }
342
343
344 1;
345
346