updated Storage server
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Application / Storage / WORM.pm
1 package OpenILS::Application::Storage::WORM;
2 use base qw/OpenILS::Application::Storage/;
3 use strict; use warnings;
4
5 use OpenSRF::EX qw/:try/;
6
7 use OpenILS::Utils::FlatXML;
8 use OpenILS::Utils::Fieldmapper;
9 use JSON;
10
11 use XML::LibXML;
12 use XML::LibXSLT;
13 use Time::HiRes qw(time);
14
15 my $xml_util    = OpenILS::Utils::FlatXML->new();
16
17 my $parser              = XML::LibXML->new();
18 my $xslt                        = XML::LibXSLT->new();
19 my $xslt_doc    =       $parser->parse_file( "/home/miker/cvs/OpenILS/app_server/stylesheets/MARC21slim2MODS.xsl" );
20 #my $xslt_doc   = $parser->parse_file( "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
21 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
22
23 use open qw/:utf8/;
24
25
26 sub child_init {
27         #try {
28         #       __PACKAGE__->method_lookup('i.do.not.exist');
29         #} catch Error with {
30         #       warn shift();
31         #};
32 }
33
34
35 # get me from the database
36 my $xpathset = {
37
38         title => {
39
40                 abbreviated => 
41                         "//mods:mods/mods:titleInfo[mods:title and (\@type='abreviated')]",
42
43                 translated =>
44                         "//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
45
46                 uniform =>
47                         "//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
48
49                 proper =>
50                         "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
51         },
52
53         author => {
54
55                 corporate => 
56                         "//mods:mods/mods:name[\@type='corporate']/mods:namePart".
57                                 "[../mods:role/mods:text[text()='creator']][1]",
58
59                 personal => 
60                         "//mods:mods/mods:name[\@type='personal']/mods:namePart".
61                                 "[../mods:role/mods:text[text()='creator']][1]",
62
63                 conference => 
64                         "//mods:mods/mods:name[\@type='conference']/mods:namePart".
65                                 "[../mods:role/mods:text[text()='creator']][1]",
66
67                 other => 
68                         "//mods:mods/mods:name[\@type='personal']/mods:namePart",
69         },
70
71         subject => {
72
73                 geographic => 
74                         "//mods:mods/mods:subject/mods:geographic",
75
76                 name => 
77                         "//mods:mods/mods:subject/mods:name",
78
79                 temporal => 
80                         "//mods:mods/mods:subject/mods:temporal",
81
82                 topic => 
83                         "//mods:mods/mods:subject/mods:topic",
84
85                 genre => 
86                         "//mods:mods/mods:genre",
87
88         },
89
90         keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
91
92 };
93
94
95 # --------------------------------------------------------------------------------
96
97 __PACKAGE__->register_method( 
98         api_name        => "open-ils.worm.wormize",
99         method          => "wormize",
100         api_level       => 1,
101         argc            => 1,
102 );
103
104 sub wormize {
105
106         my( $self, $client, $docid ) = @_;
107
108         # step -1: grab the doc from storage
109         my $meth = $self->method_lookup('open-ils.storage.biblio.record_marc.retrieve');
110         my ($marc) = $meth->run($docid);
111         return undef unless ($marc);
112         return $self->wormize_marc( $client, $docid, $marc->marc );
113 }
114
115
116 __PACKAGE__->register_method( 
117         api_name        => "open-ils.worm.wormize.marc",
118         method          => "wormize",
119         api_level       => 1,
120         argc            => 1,
121 );
122
123 my $rm_old_fr;
124 my $rm_old_tr;
125 my $rm_old_ar;
126 my $rm_old_sr;
127 my $rm_old_kr;
128
129 my $fr_create;
130 my $create = {};
131
132 my $begin;
133 my $commit;
134 my $rollback;
135
136 sub wormize_marc {
137         my( $self, $client, $docid, $xml) = @_;
138
139         $rm_old_fr = $self->method_lookup( 'open-ils.storage.metabib.full_rec.mass_delete')
140                 unless ($rm_old_fr);
141
142         $rm_old_tr = $self->method_lookup( 'open-ils.storage.metabib.title_field_entry.mass_delete')
143                 unless ($rm_old_tr);
144
145         $rm_old_ar = $self->method_lookup( 'open-ils.storage.metabib.author_field_entry.mass_delete')
146                 unless ($rm_old_ar);
147
148         $rm_old_sr = $self->method_lookup( 'open-ils.storage.metabib.subject_field_entry.mass_delete')
149                 unless ($rm_old_sr);
150
151         $rm_old_kr = $self->method_lookup( 'open-ils.storage.metabib.keyword_field_entry.mass_delete')
152                 unless ($rm_old_kr);
153
154         $fr_create = $self->method_lookup( 'open-ils.storage.metabib.full_rec.batch.create')
155                 unless ($fr_create);
156         $$create{title} = $self->method_lookup( 'open-ils.storage.metabib.title_field_entry.batch.create')
157                 unless ($$create{title});
158         $$create{author} = $self->method_lookup( 'open-ils.storage.metabib.author_field_entry.batch.create')
159                 unless ($$create{author});
160         $$create{subject} = $self->method_lookup( 'open-ils.storage.metabib.subject_field_entry.batch.create')
161                 unless ($$create{subject});
162         $$create{keyword} = $self->method_lookup( 'open-ils.storage.metabib.keyword_field_entry.batch.create')
163                 unless ($$create{keyword});
164
165         $begin = $self->method_lookup( 'open-ils.storage.transaction.begin')
166                 unless ($begin);
167         $commit = $self->method_lookup( 'open-ils.storage.transaction.commit')
168                 unless ($commit);
169         $rollback = $self->method_lookup( 'open-ils.storage.transaction.rollback')
170                 unless ($rollback);
171
172
173         my ($br) = $begin->run($client);
174         unless (defined $br) {
175                 $rollback->run;
176                 throw OpenSRF::EX::PANIC ("Couldn't BEGIN transaction!")
177         }
178
179         # step 0: turn the doc into marcxml and delete old entries
180         my $marcdoc = $parser->parse_string($xml);
181
182         my ($res) = $rm_old_fr->run( { record => $docid } );
183         throw OpenSRF::EX::PANIC ("Couldn't remove old metabib::full_rec entries!")
184                 unless (defined $res);
185
186         undef $res;
187         ($res) = $rm_old_tr->run( { source => $docid } );
188         throw OpenSRF::EX::PANIC ("Couldn't remove old metabib::title_field_entry entries!")
189                 unless (defined $res);
190
191         undef $res;
192         ($res) = $rm_old_ar->run( { source => $docid } );
193         throw OpenSRF::EX::PANIC ("Couldn't remove old metabib::author_field_entry entries!")
194                 unless (defined $res);
195
196         undef $res;
197         ($res) = $rm_old_sr->run( { source => $docid } );
198         throw OpenSRF::EX::PANIC ("Couldn't remove old metabib::subject_field_entry entries!")
199                 unless (defined $res);
200
201         undef $res;
202         ($res) = $rm_old_kr->run( { source => $docid } );
203         throw OpenSRF::EX::PANIC ("Couldn't remove old metabib::keyword_field_entry entries!")
204                 unless (defined $res);
205
206         # step 2: build the KOHA rows
207         my @ns_list = _marcxml_to_full_rows( $marcdoc );
208         $_->record( $docid ) for (@ns_list);
209
210
211         my ($fr) = $fr_create->run(@ns_list);
212         unless (defined $fr) {
213                 $rollback->run;
214                 throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.metabib.full_rec.batch.create!")
215         }
216
217         # step 4: get the MODS based metadata
218         my $data = $self->modsdoc_to_values( $mods_sheet->transform($marcdoc) );
219
220         # step 5: insert the new metadata
221         for my $class ( keys %$data ) {
222                         
223                 my $fm_constructor = "Fieldmapper::metabib::${class}_field_entry";
224                 my @md_list = ();
225                 for my $row ( keys %{ $$data{$class} } ) {
226                         next unless (exists $$data{$class}{$row});
227                         next unless ($$data{$class}{$row});
228                         my $fm_obj = $fm_constructor->new;
229                         $fm_obj->value( $$data{$class}{$row} );
230                         $fm_obj->source( $docid );
231
232                         # XXX This needs to be a real thing once the xpath is in the DB
233                         $fm_obj->field( 1 );
234
235                         push @md_list, $fm_obj;
236                 }
237                         
238                 my ($cr) = $$create{$class}->run(@md_list);
239                 unless (defined $cr) {
240                         $rollback->run;
241                         throw OpenSRF::EX::PANIC ("Couldn't run open-ils.storage.metabib.${class}_field_entry.batch.create!")
242                 }
243         }
244
245         my ($c) = $commit->run;
246         unless (defined $c) {
247                 $rollback->run;
248                 throw OpenSRF::EX::PANIC ("Couldn't COMMIT changes!")
249         }
250
251         return 1;
252
253 }
254
255
256
257 # --------------------------------------------------------------------------------
258
259
260 sub _marcxml_to_full_rows {
261
262         my $marcxml = shift;
263
264         my @ns_list;
265         
266         my $root = $marcxml->documentElement;
267
268         for my $tagline ( @{$root->getChildrenByTagName("leader")} ) {
269                 next unless $tagline;
270
271                 my $ns = new Fieldmapper::metabib::full_rec;
272
273                 $ns->tag( 'LDR' );
274                 my $val = $tagline->textContent;
275                 $val =~ s/(\pM)//gso;
276                 $ns->value( $val );
277
278                 push @ns_list, $ns;
279         }
280
281         for my $tagline ( @{$root->getChildrenByTagName("controlfield")} ) {
282                 next unless $tagline;
283
284                 my $ns = new Fieldmapper::metabib::full_rec;
285
286                 $ns->tag( $tagline->getAttribute( "tag" ) );
287                 my $val = $tagline->textContent;
288                 $val =~ s/(\pM)//gso;
289                 $ns->value( $val );
290
291                 push @ns_list, $ns;
292         }
293
294         for my $tagline ( @{$root->getChildrenByTagName("datafield")} ) {
295                 next unless $tagline;
296
297                 for my $data ( @{$tagline->getChildrenByTagName("subfield")} ) {
298                         next unless $tagline;
299
300                         my $ns = new Fieldmapper::metabib::full_rec;
301
302                         $ns->tag( $tagline->getAttribute( "tag" ) );
303                         $ns->ind1( $tagline->getAttribute( "ind1" ) );
304                         $ns->ind2( $tagline->getAttribute( "ind2" ) );
305                         $ns->subfield( $data->getAttribute( "code" ) );
306                         my $val = $data->textContent;
307                         $val =~ s/(\pM)//gso;
308                         $ns->value( lc($val) );
309
310                         push @ns_list, $ns;
311                 }
312         }
313         return @ns_list;
314 }
315
316 sub _get_field_value {
317
318         my( $mods, $xpath ) = @_;
319
320         my $string = "";
321         my $root = $mods->documentElement;
322         $root->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
323
324         # grab the set of matching nodes
325         my @nodes = $root->findnodes( $xpath );
326         for my $value (@nodes) {
327
328                 # grab all children of the node
329                 my @children = $value->childNodes();
330                 for my $child (@children) {
331
332                         # add the childs content to the growing buffer
333                         my $content = quotemeta($child->textContent);
334                         next if ($string =~ /$content/);  # uniquify the values
335                         $string .= $child->textContent . " ";
336                 }
337                 if( ! @children ) {
338                         $string .= $value->textContent . " ";
339                 }
340         }
341         $string =~ s/(\pM)//gso;
342         return lc($string);
343 }
344
345
346 sub modsdoc_to_values {
347         my( $self, $mods ) = @_;
348         my $data = {};
349         for my $class (keys %$xpathset) {
350                 $data->{$class} = {};
351                 for my $type(keys %{$xpathset->{$class}}) {
352                         my $value = _get_field_value( $mods, $xpathset->{$class}->{$type} );
353                         $data->{$class}->{$type} = $value;
354                 }
355         }
356         return $data;
357 }
358
359
360 1;
361
362