]> git.evergreen-ils.org Git - Evergreen.git/blob - Open-ILS/src/perlmods/OpenILS/Utils/ModsParser.pm
we now return a virtual_record fieldmapper object
[Evergreen.git] / Open-ILS / src / perlmods / OpenILS / Utils / ModsParser.pm
1 package OpenILS::Utils::ModsParser;
2 use strict; use warnings;
3
4 use OpenSRF::EX qw/:try/;
5 use XML::LibXML;
6 use XML::LibXSLT;
7 use Time::HiRes qw(time);
8 use OpenILS::Utils::Fieldmapper;
9
10 my $parser              = XML::LibXML->new();
11 my $xslt                        = XML::LibXSLT->new();
12 my $xslt_doc    = $parser->parse_file( 
13                 "/pines/cvs/ILS/Open-ILS/xsl/MARC21slim2MODS.xsl" );
14 my $mods_sheet = $xslt->parse_stylesheet( $xslt_doc );
15
16 # ----------------------------------------------------------------------------------------
17 # XXX get me from the database and cache me ...
18 my $isbn_xpath                  = "//mods:mods/mods:identifier[\@type='isbn']";
19 my $resource_xpath      = "//mods:mods/mods:typeOfResource";
20 my $pub_xpath                   = "//mods:mods/mods:originInfo//mods:dateIssued[\@encoding='marc']|" . 
21                                                                 "//mods:mods/mods:originInfo//mods:dateIssued[1]";
22 my $tcn_xpath                   = "//mods:mods/mods:recordInfo/mods:recordIdentifier";
23 my $publisher_xpath     = "//mods:mods/mods:originInfo//mods:publisher[1]";
24
25
26 my $xpathset = {
27         title => {
28                 abbreviated => 
29                         "//mods:mods/mods:titleInfo[mods:title and (\@type='abreviated')]",
30                 translated =>
31                         "//mods:mods/mods:titleInfo[mods:title and (\@type='translated')]",
32                 uniform =>
33                         "//mods:mods/mods:titleInfo[mods:title and (\@type='uniform')]",
34                 proper =>
35                         "//mods:mods/mods:titleInfo[mods:title and not (\@type)]",
36         },
37         author => {
38                 corporate => 
39                         "//mods:mods/mods:name[\@type='corporate']/*[local-name()='namePart']".
40                                 "[../mods:role/mods:text[text()='creator']][1]",
41                 personal => 
42                         "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']".
43                                 "[../mods:role/mods:text[text()='creator']][1]",
44                 conference => 
45                         "//mods:mods/mods:name[\@type='conference']/*[local-name()='namePart']".
46                                 "[../mods:role/mods:text[text()='creator']][1]",
47                 other => 
48                         "//mods:mods/mods:name[\@type='personal']/*[local-name()='namePart']",
49         },
50         subject => {
51                 geographic => 
52                         "//mods:mods/*[local-name()='subject']/*[local-name()='geographic']",
53                 name => 
54                         "//mods:mods/*[local-name()='subject']/*[local-name()='name']",
55                 temporal => 
56                         "//mods:mods/*[local-name()='subject']/*[local-name()='temporal']",
57                 topic => 
58                         "//mods:mods/*[local-name()='subject']/*[local-name()='topic']",
59         },
60         keyword => { keyword => "//mods:mods/*[not(local-name()='originInfo')]", },
61 };
62 # ----------------------------------------------------------------------------------------
63
64
65
66 sub new { return bless( {}, shift() ); }
67
68 sub get_field_value {
69
70         my( $self, $mods, $xpath ) = @_;
71
72         my @string;
73         my $root = $mods->documentElement;
74         $root->setNamespace( "http://www.loc.gov/mods/", "mods", 1 );
75
76         # grab the set of matching nodes
77         my @nodes = $root->findnodes( $xpath );
78         for my $value (@nodes) {
79
80                 # grab all children of the node
81                 my @children = $value->childNodes();
82                 for my $child (@children) {
83
84                         # add the childs content to the growing buffer
85                         my $content = quotemeta($child->textContent);
86                         push(@string, $child->textContent );
87                 }
88                 if( ! @children ) {
89                         push(@string, $value->textContent );
90                 }
91         }
92         return @string;
93 }
94
95
96 sub modsdoc_to_values {
97         my( $self, $mods ) = @_;
98         my $data = {};
99         for my $class (keys %$xpathset) {
100                 $data->{$class} = {};
101                 for my $type(keys %{$xpathset->{$class}}) {
102                         my @value = $self->get_field_value( $mods, $xpathset->{$class}->{$type} );
103                         if( $class eq "subject" ) {
104                                 push( @{$data->{$class}->{$type}},  @value );
105                         } else {
106                                 $data->{$class}->{$type} = $value[0];
107                         }
108                 }
109         }
110         return $data;
111 }
112
113
114
115 # ---------------------------------------------------------------------------
116 # Grabs the data 'we want' from the MODS doc and returns it in hash form
117 # ---------------------------------------------------------------------------
118 sub mods_values_to_mods_slim {
119         my( $self, $modsperl ) = @_;
120
121         my $title = "";
122         my $author = "";
123         my $subject = [];
124
125         my $tmp = $modsperl->{title};
126
127         if(!$tmp) { $title = ""; }
128         else {
129                 ($title = $tmp->{proper}) ||
130                 ($title = $tmp->{translated}) ||
131                 ($title = $tmp->{abbreviated}) ||
132                 ($title = $tmp->{uniform});
133         }
134
135         $tmp = $modsperl->{author};
136         if(!$tmp) { $author = ""; }
137         else {
138                 ($author = $tmp->{personal}) ||
139                 ($author = $tmp->{other}) ||
140                 ($author = $tmp->{corporate}) ||
141                 ($author = $tmp->{conference}); 
142         }
143
144         $tmp = $modsperl->{subject};
145         if(!$tmp) { $subject = []; } 
146         else {
147                 for my $key( keys %{$tmp}) {
148                         push(@$subject, @{$tmp->{$key}}) if ($tmp->{$key});
149                 }
150         }
151
152         return { title => $title, author => $author, subject => $subject };
153
154 }
155
156
157
158 # ---------------------------------------------------------------------------
159 # Initializes a MARC -> Unified MODS batch process
160 # ---------------------------------------------------------------------------
161
162 sub start_mods_batch {
163
164         my( $self, $master_doc ) = @_;
165
166         my $xmldoc = $parser->parse_string($master_doc);
167         my $mods = $mods_sheet->transform($xmldoc);
168
169         $self->{master_doc} = $self->modsdoc_to_values( $mods );
170         $self->{master_doc} = $self->mods_values_to_mods_slim( $self->{master_doc} );
171
172         ($self->{master_doc}->{isbn}) = 
173                 $self->get_field_value( $mods, $isbn_xpath );
174
175         $self->{master_doc}->{type_of_resource} = 
176                 [ $self->get_field_value( $mods, $resource_xpath ) ];
177
178         ($self->{master_doc}->{tcn}) = 
179                 $self->get_field_value( $mods, $tcn_xpath );
180
181         ($self->{master_doc}->{pubdate}) = 
182                 $self->get_field_value( $mods, $pub_xpath );
183
184         ($self->{master_doc}->{publisher}) = 
185                 $self->get_field_value( $mods, $publisher_xpath );
186
187 }
188
189 # ---------------------------------------------------------------------------
190 # Takes a MARCXML string and adds it to the growing MODS doc
191 # ---------------------------------------------------------------------------
192 sub push_mods_batch {
193         my( $self, $marcxml ) = @_;
194
195         my $xmldoc = $parser->parse_string($marcxml);
196         my $mods = $mods_sheet->transform($xmldoc);
197
198         my $xmlperl = $self->modsdoc_to_values( $mods );
199         $xmlperl = $self->mods_values_to_mods_slim( $xmlperl );
200
201         for my $subject( @{$xmlperl->{subject}} ) {
202                 push @{$self->{master_doc}->{subject}}, $subject;
203         }
204
205         push( @{$self->{master_doc}->{type_of_resource}}, 
206                 $self->get_field_value( $mods, $resource_xpath ));
207
208         if(!($self->{master_doc}->{isbn}) ) {
209                 ($self->{master_doc}->{isbn}) = 
210                         $self->get_field_value( $mods, $isbn_xpath );
211         }
212 }
213
214
215 # ---------------------------------------------------------------------------
216 # Completes a MARC -> Unified MODS batch process and returns the perl hash
217 # ---------------------------------------------------------------------------
218 sub init_virtual_record {
219         my $record = new Fieldmapper::metabib::virtual_record;
220         $record->subject([]);
221         $record->types_of_resource([]);
222         $record->call_numbers([]);
223         return $record;
224 }
225
226 sub finish_mods_batch {
227         my $self = shift;
228         my $perl = $self->{master_doc};
229         my $record = init_virtual_record();
230
231         # turn the hash into a fieldmapper object
232         $record->title($perl->{title});
233         $record->author($perl->{author});
234         $record->doc_id($perl->{doc_id});
235         $record->isbn($perl->{isbn});
236         $record->pubdate($perl->{pubdate});
237         $record->publisher($perl->{publisher});
238         $record->tcn($perl->{tcn});
239         $record->subject($perl->{subject});
240         $record->types_of_resource($perl->{types_of_resource});
241
242         $self->{master_doc} = undef;
243         #return $perl
244         return $record;
245 }
246
247
248