1 package OpenILS::Utils::ModsParser;
2 use strict; use warnings;
4 use OpenSRF::EX qw/:try/;
7 use Time::HiRes qw(time);
8 use OpenILS::Utils::Fieldmapper;
10 my $parser = XML::LibXML->new();
11 my $xslt = XML::LibXSLT->new();
12 my $xslt_doc = $parser->parse_file(
13 "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
14 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
16 # ----------------------------------------------------------------------------------------
17 # XXX get me from the database and cache me ...
18 my $isbn_xpath = "//mods:mods/mods:identifier[\@type='isbn']";
19 my $resource_xpath = "//mods:mods/mods:typeOfResource";
20 my $pub_xpath = "//mods:mods/mods:originInfo//mods:dateIssued[\@encoding='marc']|" .
21 "//mods:mods/mods:originInfo//mods:dateIssued[1]";
22 my $tcn_xpath = "//mods:mods/mods:recordInfo/mods:recordIdentifier";
23 my $publisher_xpath = "//mods:mods/mods:originInfo//mods:publisher[1]";
29 "//mods:mods/mods:titleInfo[mods:title and (\@type='abreviated')]",
31 "//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
33 "//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
35 "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
39 "//mods:mods/mods:name[\@type='corporate']/*[local-name()='namePart']".
40 "[../mods:role/mods:text[text()='creator']][1]",
42 "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']".
43 "[../mods:role/mods:text[text()='creator']][1]",
45 "//mods:mods/mods:name[\@type='conference']/*[local-name()='namePart']".
46 "[../mods:role/mods:text[text()='creator']][1]",
48 "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']",
52 "//mods:mods/*[local-name()='subject']/*[local-name()='geographic']",
54 "//mods:mods/*[local-name()='subject']/*[local-name()='name']",
56 "//mods:mods/*[local-name()='subject']/*[local-name()='temporal']",
58 "//mods:mods/*[local-name()='subject']/*[local-name()='topic']",
60 keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
62 # ----------------------------------------------------------------------------------------
66 sub new { return bless( {}, shift() ); }
70 my( $self, $mods, $xpath ) = @_;
73 my $root = $mods->documentElement;
74 $root->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
76 # grab the set of matching nodes
77 my @nodes = $root->findnodes( $xpath );
78 for my $value (@nodes) {
80 # grab all children of the node
81 my @children = $value->childNodes();
82 for my $child (@children) {
84 # add the childs content to the growing buffer
85 my $content = quotemeta($child->textContent);
86 push(@string, $child->textContent );
89 push(@string, $value->textContent );
96 sub modsdoc_to_values {
97 my( $self, $mods ) = @_;
99 for my $class (keys %$xpathset) {
100 $data->{$class} = {};
101 for my $type(keys %{$xpathset->{$class}}) {
102 my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
103 if( $class eq "subject" ) {
104 push( @{$data->{$class}->{$type}}, @value );
106 $data->{$class}->{$type} = $value[0];
115 # ---------------------------------------------------------------------------
116 # Grabs the data 'we want' from the MODS doc and returns it in hash form
117 # ---------------------------------------------------------------------------
118 sub mods_values_to_mods_slim {
119 my( $self, $modsperl ) = @_;
125 my $tmp = $modsperl->{title};
127 if(!$tmp) { $title = ""; }
129 ($title = $tmp->{proper}) ||
130 ($title = $tmp->{translated}) ||
131 ($title = $tmp->{abbreviated}) ||
132 ($title = $tmp->{uniform});
135 $tmp = $modsperl->{author};
136 if(!$tmp) { $author = ""; }
138 ($author = $tmp->{personal}) ||
139 ($author = $tmp->{other}) ||
140 ($author = $tmp->{corporate}) ||
141 ($author = $tmp->{conference});
144 $tmp = $modsperl->{subject};
145 if(!$tmp) { $subject = []; }
147 for my $key( keys %{$tmp}) {
148 push(@$subject, @{$tmp->{$key}}) if ($tmp->{$key});
152 return { title => $title, author => $author, subject => $subject };
158 # ---------------------------------------------------------------------------
159 # Initializes a MARC -> Unified MODS batch process
160 # ---------------------------------------------------------------------------
162 sub start_mods_batch {
164 my( $self, $master_doc ) = @_;
166 my $xmldoc = $parser->parse_string($master_doc);
167 my $mods = $mods_sheet->transform($xmldoc);
169 $self->{master_doc} = $self->modsdoc_to_values( $mods );
170 $self->{master_doc} = $self->mods_values_to_mods_slim( $self->{master_doc} );
172 ($self->{master_doc}->{isbn}) =
173 $self->get_field_value( $mods, $isbn_xpath );
175 $self->{master_doc}->{type_of_resource} =
176 [ $self->get_field_value( $mods, $resource_xpath ) ];
178 ($self->{master_doc}->{tcn}) =
179 $self->get_field_value( $mods, $tcn_xpath );
181 ($self->{master_doc}->{pubdate}) =
182 $self->get_field_value( $mods, $pub_xpath );
184 ($self->{master_doc}->{publisher}) =
185 $self->get_field_value( $mods, $publisher_xpath );
189 # ---------------------------------------------------------------------------
190 # Takes a MARCXML string and adds it to the growing MODS doc
191 # ---------------------------------------------------------------------------
192 sub push_mods_batch {
193 my( $self, $marcxml ) = @_;
195 my $xmldoc = $parser->parse_string($marcxml);
196 my $mods = $mods_sheet->transform($xmldoc);
198 my $xmlperl = $self->modsdoc_to_values( $mods );
199 $xmlperl = $self->mods_values_to_mods_slim( $xmlperl );
201 for my $subject( @{$xmlperl->{subject}} ) {
202 push @{$self->{master_doc}->{subject}}, $subject;
205 push( @{$self->{master_doc}->{type_of_resource}},
206 $self->get_field_value( $mods, $resource_xpath ));
208 if(!($self->{master_doc}->{isbn}) ) {
209 ($self->{master_doc}->{isbn}) =
210 $self->get_field_value( $mods, $isbn_xpath );
215 # ---------------------------------------------------------------------------
216 # Completes a MARC -> Unified MODS batch process and returns the perl hash
217 # ---------------------------------------------------------------------------
218 sub init_virtual_record {
219 my $record = new Fieldmapper::metabib::virtual_record;
220 $record->subject([]);
221 $record->types_of_resource([]);
222 $record->call_numbers([]);
226 sub finish_mods_batch {
228 my $perl = $self->{master_doc};
229 my $record = init_virtual_record();
231 # turn the hash into a fieldmapper object
232 $record->title($perl->{title});
233 $record->author($perl->{author});
234 $record->doc_id($perl->{doc_id});
235 $record->isbn($perl->{isbn});
236 $record->pubdate($perl->{pubdate});
237 $record->publisher($perl->{publisher});
238 $record->tcn($perl->{tcn});
239 $record->subject($perl->{subject});
240 $record->types_of_resource($perl->{types_of_resource});
242 $self->{master_doc} = undef;