1 package OpenILS::Utils::ModsParser;
2 use strict; use warnings;
4 use OpenSRF::EX qw/:try/;
7 use Time::HiRes qw(time);
9 my $parser = XML::LibXML->new();
10 my $xslt = XML::LibXSLT->new();
11 my $xslt_doc = $parser->parse_file(
12 "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
13 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
15 # ----------------------------------------------------------------------------------------
16 # XXX get me from the database and cache me ...
17 my $isbn_xpath = "//mods:mods/mods:identifier[\@type='isbn']";
18 my $resource_xpath = "//mods:mods/mods:typeOfResource";
19 my $pub_xpath = "//mods:mods/mods:originInfo//mods:dateIssued[\@encoding='marc']|" .
20 "//mods:mods/mods:originInfo//mods:dateIssued[1]";
21 my $tcn_xpath = "//mods:mods/mods:recordInfo/mods:recordIdentifier";
22 my $publisher_xpath = "//mods:mods/mods:originInfo//mods:publisher[1]";
28 "//mods:mods/mods:titleInfo[mods:title and (\@type='abreviated')]",
30 "//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
32 "//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
34 "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
38 "//mods:mods/mods:name[\@type='corporate']/*[local-name()='namePart']".
39 "[../mods:role/mods:text[text()='creator']][1]",
41 "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']".
42 "[../mods:role/mods:text[text()='creator']][1]",
44 "//mods:mods/mods:name[\@type='conference']/*[local-name()='namePart']".
45 "[../mods:role/mods:text[text()='creator']][1]",
47 "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']",
51 "//mods:mods/*[local-name()='subject']/*[local-name()='geographic']",
53 "//mods:mods/*[local-name()='subject']/*[local-name()='name']",
55 "//mods:mods/*[local-name()='subject']/*[local-name()='temporal']",
57 "//mods:mods/*[local-name()='subject']/*[local-name()='topic']",
59 keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
61 # ----------------------------------------------------------------------------------------
65 sub new { return bless( {}, shift() ); }
69 my( $self, $mods, $xpath ) = @_;
72 my $root = $mods->documentElement;
73 $root->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
75 # grab the set of matching nodes
76 my @nodes = $root->findnodes( $xpath );
77 for my $value (@nodes) {
79 # grab all children of the node
80 my @children = $value->childNodes();
81 for my $child (@children) {
83 # add the childs content to the growing buffer
84 my $content = quotemeta($child->textContent);
85 next if ($string =~ /$content/); # uniquify the values ! don't de-dup for the WORM!
86 $string .= $child->textContent . " ";
89 $string .= $value->textContent . " ";
96 sub modsdoc_to_values {
97 my( $self, $mods ) = @_;
99 for my $class (keys %$xpathset) {
100 $data->{$class} = {};
101 for my $type(keys %{$xpathset->{$class}}) {
102 my $value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
103 $data->{$class}->{$type} = $value;
111 # ---------------------------------------------------------------------------
112 # Grabs the data 'we want' from the MODS doc and returns it in hash form
113 # ---------------------------------------------------------------------------
114 sub mods_values_to_mods_slim {
115 my( $self, $modsperl ) = @_;
121 my $tmp = $modsperl->{title};
123 if(!$tmp) { $title = ""; }
125 ($title = $tmp->{proper}) ||
126 ($title = $tmp->{translated}) ||
127 ($title = $tmp->{abbreviated}) ||
128 ($title = $tmp->{uniform});
131 $tmp = $modsperl->{author};
132 if(!$tmp) { $author = ""; }
134 ($author = $tmp->{personal}) ||
135 ($author = $tmp->{other}) ||
136 ($author = $tmp->{corporate}) ||
137 ($author = $tmp->{conference});
140 $tmp = $modsperl->{subject};
141 if(!$tmp) { $subject = []; }
143 for my $key( keys %{$tmp}) {
144 push(@$subject, $tmp->{$key}) if $tmp->{$key};
148 return { title => $title, author => $author, subject => $subject };
154 # ---------------------------------------------------------------------------
155 # Initializes a MARC -> Unified MODS batch process
156 # ---------------------------------------------------------------------------
158 sub start_mods_batch {
160 my( $self, $master_doc ) = @_;
162 my $xmldoc = $parser->parse_string($master_doc);
163 my $mods = $mods_sheet->transform($xmldoc);
165 $self->{master_doc} = $self->modsdoc_to_values( $mods );
166 $self->{master_doc} = $self->mods_values_to_mods_slim( $self->{master_doc} );
168 $self->{master_doc}->{isbn} =
169 $self->get_field_value( $mods, $isbn_xpath );
171 $self->{master_doc}->{type_of_resource} =
172 [ $self->get_field_value( $mods, $resource_xpath ) ];
174 $self->{master_doc}->{tcn} =
175 $self->get_field_value( $mods, $tcn_xpath );
177 $self->{master_doc}->{pubdate} =
178 $self->get_field_value( $mods, $pub_xpath );
180 $self->{master_doc}->{publisher} =
181 $self->get_field_value( $mods, $publisher_xpath );
185 # ---------------------------------------------------------------------------
186 # Takes a MARCXML string and adds it to the growing MODS doc
187 # ---------------------------------------------------------------------------
188 sub push_mods_batch {
189 my( $self, $marcxml ) = @_;
191 my $xmldoc = $parser->parse_string($marcxml);
192 my $mods = $mods_sheet->transform($xmldoc);
194 my $xmlperl = $self->modsdoc_to_values( $mods );
195 $xmlperl = $self->mods_values_to_mods_slim( $xmlperl );
197 for my $subject( @{$xmlperl->{subject}} ) {
198 push @{$self->{master_doc}->{subject}}, $subject;
201 push( @{$self->{master_doc}->{type_of_resource}},
202 $self->get_field_value( $mods, $resource_xpath ));
204 if(!($self->{master_doc}->{isbn}) ) {
205 $self->{master_doc}->{isbn} =
206 $self->get_field_value( $mods, $isbn_xpath );
211 # ---------------------------------------------------------------------------
212 # Completes a MARC -> Unified MODS batch process and returns the perl hash
213 # ---------------------------------------------------------------------------
214 sub finish_mods_batch {
216 my $perl = $self->{master_doc};
217 $self->{master_doc} = undef;