6 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenILS::Application::AppUtils;
10 use OpenILS::Utils::Fieldmapper;
11 use OpenILS::Utils::Normalize qw/naco_normalize/;
12 use OpenSRF::Utils::JSON;
13 use Unicode::Normalize;
15 use Time::HiRes qw/time/;
18 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
22 MARC::Charset->ignore_errors(1);
24 # Command line options, with applicable defaults
25 my ($idsubfield, $prefix, $bibfield, $bibsubfield, @files, $libmap, $quiet, $help);
29 my $config = '@sysconfdir@/opensrf_core.xml';
30 my $marctype = 'USMARC';
32 my $parse_options = GetOptions(
33 'idfield=s' => \$idfield,
34 'idsubfield=s' => \$idsubfield,
35 'prefix=s'=> \$prefix,
36 'bibfield=s'=> \$bibfield,
37 'bibsubfield=s'=> \$bibsubfield,
38 'startid=i'=> \$count,
40 'config=s' => \$config,
41 'marctype=s' => \$marctype,
43 'libmap=s' => \$libmap,
48 if (!$parse_options or $help) {
52 @files = @ARGV if (!@files);
54 my $U = 'OpenILS::Application::AppUtils';
60 $lib_id_map = map_libraries_to_ID($libmap);
63 OpenSRF::System->bootstrap_client( config_file => $config );
64 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
66 my ($result, $evt) = get_user_id($user);
67 if ($evt || !$result->id) {
68 print("Could not retrieve user with user name '$user'\n");
74 select STDERR; $| = 1;
75 select STDOUT; $| = 1;
77 my $batch = new MARC::Batch ( $marctype, @files );
79 $batch->warnings_off();
83 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
88 $record_field = $rec->field($idfield, $idsubfield);
90 $record_field = $rec->field($idfield);
93 # Start by just using the counter as the record ID
96 # If we have identified a location for the bib record ID, grab that value
98 $record = $record_field->data;
101 # If we didn't get a bib record ID, skip and move on to the next MFHD record
103 print STDERR "Could not find a bibliographic record ID link for record $count\n";
107 # If we have been given bibfield / bibsubfield values, use those to find
108 # a matching bib record for $record and use _that_ as our record instead
110 my ($result, $evt) = map_id_to_bib($record);
111 if ($evt || !$result || !$result->record) {
112 print STDERR "Could not find matching bibliographic record for record $count\n";
115 $record = $result->record;
117 # Strip the identifier down to a usable integer
118 $record =~ s/^.*?(\d+).*?$/$1/o;
121 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
122 $xml =~ s/^<\?xml.+\?\s*>//go;
123 $xml =~ s/>\s+</></go;
124 $xml =~ s/\p{Cc}//go;
125 $xml = OpenILS::Application::AppUtils->entityize($xml);
126 $xml =~ s/[\x00-\x1f]//go;
128 my $bib = new Fieldmapper::serial::record_entry;
130 $bib->record($record);
134 $bib->creator($user);
135 $bib->create_date('now');
137 $bib->edit_date('now');
138 $bib->last_xact_id('IMPORT-'.$starttime);
141 my $lib_id = get_library_id($rec);
143 $bib->owning_lib($lib_id);
147 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
151 if (!$quiet && !($count % 20)) {
152 print STDERR "\r$count\t". $count / (time - $starttime);
156 # Generate a hash of library names (as found in the 852b in the MFHD record) to
157 # integers representing actor.org_unit ID values
158 sub map_libraries_to_ID {
159 my $map_filename = shift;
163 open(MAP_FH, '<', $map_filename) or die "Could not load [$map_filename] $!";
165 my ($lib, $id) = $_ =~ /^(.*?)\t(.*?)$/;
166 $lib_id_map{$lib} = $id;
172 # Look up the actor.org_unit.id value for this library name
176 my $lib_name = $record->field('852')->subfield('b');
177 my $lib_id = $lib_id_map->{$lib_name};
182 # Get the actor.usr.id value for the given username
184 my $username = shift;
188 $result = $U->cstorereq(
189 'open-ils.cstore.direct.actor.user.search',
190 { usrname => $username, deleted => 'f' }
192 $evt = OpenILS::Event->new('ACTOR_USR_NOT_FOUND') unless $result;
194 return ($result, $evt);
197 # Get the biblio.record_entry.id value for the given identifier
203 $record = naco_normalize($record);
205 $record = "$prefix $record";
210 value => naco_normalize($record)
214 $search{'subfield'} = $bibsubfield;
217 $result = $U->cstorereq(
218 'open-ils.cstore.direct.metabib.full_rec.search', \%search
220 $evt = OpenILS::Event->new('METABIB_FULL_REC_NOT_FOUND') unless $record;
222 return ($result, $evt);
229 marc2sre.pl - Convert MARC Format for Holdings Data (MFHD) records to SRE
230 (serial.record_entry) JSON objects
234 C<marc2sre.pl> [B<--config>=I<opensrf_core.conf>]
235 [[B<--idfield>=I<MARC-tag>[ B<--idsubfield>=I<MARC-code>]] [B<--start_id>=I<start-ID>]
236 [B<--user>=I<db-username>] [B<--marctype>=I<fileformat>]
237 [[B<--file>=I<MARC-filename>[, ...]] [B<--libmap>=I<map-file>] [B<--quiet>=I<quiet>]
238 [[B<--bibfield>=I<MARC-tag> [B<--bibsubfield>=<MARC-code>]]
242 For one or more files containing MFHD records, iterate through the records
243 and generate SRE (serial.record_entry) JSON objects.
249 =item * B<-c> I<config-file>, B<--config>=I<config-file>
251 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
252 Defaults to F<@sysconfdir@/opensrf_core.xml>
254 =item * B<--idfield> I<MARC-field>
256 Specifies the MFHD field where the identifier of the corresponding
257 bibliographic record is found. Defaults to '004'.
259 =item * B<--idsubfield> I<MARC-code>
261 Specifies the MFHD subfield, if any, where the identifier of the corresponding
262 bibliographic record is found. This option is ignored unless it is accompanied
263 by the B<--idfield> option. Defaults to null.
265 =item * B<-p> I<prefix> B<--prefix>=I<prefix>
267 Specifies the MARC code for the organization that should be prefixed to the
268 bibliographic record identifier. This option is ignored unless it is accompanied
269 by the B<--bibfield> option. Defaults to null.
271 =item * B<--bibfield> I<MARC-field>
273 Specifies the field in the bibliographic record that holds the identifier
274 value. Defaults to null.
276 =item * B<--bibsubfield> I<MARC-code>
278 Specifies the subfield in the bibliographic record, if any, that holds the
279 identifier value. This option is ignored unless it is accompanied by the
280 B<--bibfield> option. Defaults to null.
282 =item * B<-u> I<username>, B<--user>=I<username>
284 Specifies the Evergreen user that will own these serial records.
286 =item * B<-m> I<file-format>, B<--marctype>=I<file-format>
288 Specifies whether the files containg the MFHD records are in MARC21 ('MARC21')
289 or MARC21XML ('XML') format. Defaults to MARC21.
291 =item * B<-l> I<map-file>, B<--libmap>=I<map-file>
293 Points to a file to containing a mapping of library names to integers.
294 The integer represents the actor.org_unit.id value of the library. This enables
295 us to generate an ingest file that does not subsequently need to manually
298 The library name must correspond to the 'b' subfield of the 852 field.
299 Well, it does not have to, but you will have to modify this script
302 The format of the map file should be the name of the library, followed
303 by a tab, followed by the desired numeric ID of the library. For example:
308 =item * B<-q>, B<--quiet>
310 Suppresses the record counter output.
316 marc2sre.pl --user admin --marctype XML --libmap library.map --file serial_holding.xml
318 Processes MFHD records in the B<serial_holding.xml> file as a MARC21XML file,
319 using the default 004 control field for the source of the bibliographic record
320 ID and converting the ID to a plain integer for matching directly against the
321 B<biblio.record_entry.id> column. The file B<library.map> contains the mappings
322 of library names to integers, and the "admin" user will own the processed MFHD
325 marc2sre.pl --idfield 004 --prefix ocolc --bibfield 035 --bibsubfield a --user cat1 serial_holding.mrc
327 B<WARNING>: The B<--bibfield> / B<--bibsubfield> options require one database
328 lookup per MFHD record and will greatly slow down your import. Avoid if at all
331 Processes MFHD records in the B<serial_holding.xml> file. The script pulls the
332 bibliographic record identifier from the 004 control field of the MFHD record
333 and searches for a matching value in the bibliographic record in data field
334 035, subfield a. The prefix "ocolc" will be prepended to the bibliographic
335 record identifier to provide exact matchings against the
336 B<metabib.full_rec.value> column. The "cat1" user will own the processed MFHD
341 Dan Scott <dscott@laurentian.ca>
343 =head1 COPYRIGHT AND LICENSE
345 Copyright 2010-2011 by Dan Scott
347 This program is free software; you can redistribute it and/or
348 modify it under the terms of the GNU General Public License
349 as published by the Free Software Foundation; either version 2
350 of the License, or (at your option) any later version.
352 This program is distributed in the hope that it will be useful,
353 but WITHOUT ANY WARRANTY; without even the implied warranty of
354 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
355 GNU General Public License for more details.
357 You should have received a copy of the GNU General Public License
358 along with this program; if not, write to the Free Software
359 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.