1 package OpenILS::Utils::ModsParser;
2 use strict; use warnings;
4 use OpenSRF::EX qw/:try/;
7 use Time::HiRes qw(time);
8 use OpenILS::Utils::Fieldmapper;
11 my $parser = XML::LibXML->new();
12 my $xslt = XML::LibXSLT->new();
13 my $xslt_doc = $parser->parse_file(
14 "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
15 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
17 # ----------------------------------------------------------------------------------------
18 # XXX get me from the database and cache me ...
19 my $isbn_xpath = "//mods:mods/mods:identifier[\@type='isbn']";
20 my $resource_xpath = "//mods:mods/mods:typeOfResource";
21 my $pub_xpath = "//mods:mods/mods:originInfo//mods:dateIssued[\@encoding='marc']|" .
22 "//mods:mods/mods:originInfo//mods:dateIssued[1]";
23 my $tcn_xpath = "//mods:mods/mods:recordInfo/mods:recordIdentifier";
24 my $publisher_xpath = "//mods:mods/mods:originInfo//mods:publisher[1]";
30 "//mods:mods/mods:titleInfo[mods:title and (\@type='abreviated')]",
32 "//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
34 "//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
36 "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
40 "//mods:mods/mods:name[\@type='corporate']/*[local-name()='namePart']".
41 "[../mods:role/mods:text[text()='creator']][1]",
43 "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']".
44 "[../mods:role/mods:text[text()='creator']][1]",
46 "//mods:mods/mods:name[\@type='conference']/*[local-name()='namePart']".
47 "[../mods:role/mods:text[text()='creator']][1]",
49 "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']",
54 "//mods:mods/*[local-name()='subject']",
57 # "//mods:mods/*[local-name()='subject']/*[local-name()='geographic']",
59 # "//mods:mods/*[local-name()='subject']/*[local-name()='name']",
61 # "//mods:mods/*[local-name()='subject']/*[local-name()='temporal']",
63 # "//mods:mods/*[local-name()='subject']/*[local-name()='topic']",
65 #keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
68 series => "//mods:mods/mods:relatedItem[\@type='series']/mods:titleInfo"
71 # ----------------------------------------------------------------------------------------
75 sub new { return bless( {}, shift() ); }
79 my( $self, $mods, $xpath ) = @_;
82 my $root = $mods->documentElement;
83 $root->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
85 # grab the set of matching nodes
86 my @nodes = $root->findnodes( $xpath );
87 for my $value (@nodes) {
89 # grab all children of the node
90 my @children = $value->childNodes();
92 for my $child (@children) {
93 next unless( $child->nodeType != 3 );
95 if($child->childNodes) {
97 for my $c (@{$child->childNodes}){
98 push @a, $c->textContent;
100 push(@child_text, join(' ', @a));
103 push(@child_text, $child->textContent);
108 push(@string, \@child_text);
112 push(@string, $value->textContent );
119 sub _modsdoc_to_values {
120 my( $self, $mods ) = @_;
122 for my $class (keys %$xpathset) {
123 $data->{$class} = {};
124 for my $type(keys %{$xpathset->{$class}}) {
125 my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
126 if( $class eq "subject" ) {
127 push( @{$data->{$class}->{$type}}, @value );
129 $data->{$class}->{$type} = $value[0];
137 sub modsdoc_to_values {
138 my( $self, $mods ) = @_;
142 my $class = "subject";
143 $data->{$class} = {};
144 for my $type(keys %{$xpathset->{$class}}) {
145 my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
146 for my $arr (@value) {
147 push( @{$data->{$class}->{$type}}, $arr);
154 $data->{$class} = {};
155 for my $type(keys %{$xpathset->{$class}}) {
156 my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
157 for my $arr (@value) {
159 $data->{$class}->{$type} = join(" ", @$arr);
161 $data->{$class}->{$type} = $arr;
168 my $class = "author";
169 $data->{$class} = {};
170 for my $type(keys %{$xpathset->{$class}}) {
171 my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
172 $data->{$class}->{$type} = $value[0];
177 my $class = "series";
178 $data->{$class} = {};
179 for my $type(keys %{$xpathset->{$class}}) {
180 my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
181 for my $arr (@value) {
183 push(@{$data->{$class}->{$type}}, join(" ", @$arr));
185 push( @{$data->{$class}->{$type}}, $arr );
198 # ---------------------------------------------------------------------------
199 # Grabs the data 'we want' from the MODS doc and returns it in hash form
200 # ---------------------------------------------------------------------------
201 sub mods_values_to_mods_slim {
202 my( $self, $modsperl ) = @_;
209 my $tmp = $modsperl->{title};
212 if(!$tmp) { $title = ""; }
214 ($title = $tmp->{proper}) ||
215 ($title = $tmp->{translated}) ||
216 ($title = $tmp->{abbreviated}) ||
217 ($title = $tmp->{uniform});
220 $tmp = $modsperl->{author};
221 if(!$tmp) { $author = ""; }
223 ($author = $tmp->{personal}) ||
224 ($author = $tmp->{other}) ||
225 ($author = $tmp->{corporate}) ||
226 ($author = $tmp->{conference});
229 $tmp = $modsperl->{subject};
230 if(!$tmp) { $subject = []; }
232 for my $key( keys %{$tmp}) {
233 push(@$subject, @{$tmp->{$key}}) if ($tmp->{$key});
237 $tmp = $modsperl->{'series'};
238 if(!$tmp) { $series = []; }
239 else { $series = $tmp->{'series'}; }
242 return { series => $series, title => $title, author => $author, subject => $subject };
248 # ---------------------------------------------------------------------------
249 # Initializes a MARC -> Unified MODS batch process
250 # ---------------------------------------------------------------------------
252 sub start_mods_batch {
254 my( $self, $master_doc ) = @_;
256 my $xmldoc = $parser->parse_string($master_doc);
257 my $mods = $mods_sheet->transform($xmldoc);
259 # warn "-" x 100 . "\n";
260 # warn "MODS " . $mods->toString(1) . "\n";
261 # warn "-" x 100 . "\n";
263 $self->{master_doc} = $self->modsdoc_to_values( $mods );
264 $self->{master_doc} = $self->mods_values_to_mods_slim( $self->{master_doc} );
266 ($self->{master_doc}->{isbn}) =
267 $self->get_field_value( $mods, $isbn_xpath );
269 $self->{master_doc}->{type_of_resource} =
270 [ $self->get_field_value( $mods, $resource_xpath ) ];
272 ($self->{master_doc}->{tcn}) =
273 $self->get_field_value( $mods, $tcn_xpath );
275 ($self->{master_doc}->{pubdate}) =
276 $self->get_field_value( $mods, $pub_xpath );
278 ($self->{master_doc}->{publisher}) =
279 $self->get_field_value( $mods, $publisher_xpath );
283 # ---------------------------------------------------------------------------
284 # Takes a MARCXML string and adds it to the growing MODS doc
285 # ---------------------------------------------------------------------------
286 sub push_mods_batch {
287 my( $self, $marcxml ) = @_;
289 my $xmldoc = $parser->parse_string($marcxml);
290 my $mods = $mods_sheet->transform($xmldoc);
292 my $xmlperl = $self->modsdoc_to_values( $mods );
293 $xmlperl = $self->mods_values_to_mods_slim( $xmlperl );
295 for my $subject( @{$xmlperl->{subject}} ) {
296 push @{$self->{master_doc}->{subject}}, $subject;
299 push( @{$self->{master_doc}->{type_of_resource}},
300 $self->get_field_value( $mods, $resource_xpath ));
302 if(!($self->{master_doc}->{isbn}) ) {
303 ($self->{master_doc}->{isbn}) =
304 $self->get_field_value( $mods, $isbn_xpath );
309 # ---------------------------------------------------------------------------
310 # Completes a MARC -> Unified MODS batch process and returns the perl hash
311 # ---------------------------------------------------------------------------
312 sub init_virtual_record {
313 my $record = new Fieldmapper::metabib::virtual_record;
314 $record->subject([]);
315 $record->types_of_resource([]);
316 $record->call_numbers([]);
320 sub finish_mods_batch {
322 my $perl = $self->{master_doc};
323 my $record = init_virtual_record();
325 # turn the hash into a fieldmapper object
326 (my $title = $perl->{title}) =~ s/\[.*?\]//og;
327 (my $author = $perl->{author}) =~ s/\(.*?\)//og;
330 for my $s (@{$perl->{series}}) {
331 push @series, (split( /\s*;/, $s ))[0];
334 $record->title($title);
335 $record->author($author);
337 $record->doc_id($perl->{doc_id});
338 $record->isbn($perl->{isbn});
339 $record->pubdate($perl->{pubdate});
340 $record->publisher($perl->{publisher});
341 $record->tcn($perl->{tcn});
343 $record->subject($perl->{subject});
344 $record->types_of_resource($perl->{types_of_resource});
345 $record->series(\@series);
347 $self->{master_doc} = undef;