6 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenILS::Application::AppUtils;
10 use OpenILS::Utils::Fieldmapper;
11 use OpenSRF::Utils::JSON;
12 use Unicode::Normalize;
14 use Time::HiRes qw/time/;
17 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
21 MARC::Charset->ignore_errors(1);
23 # Command line options, with applicable defaults
24 my ($idsubfield, $bibfield, $bibsubfield, @files, $libmap, $quiet, $help);
28 my $config = '@sysconfdir@/opensrf_core.xml';
29 my $marctype = 'USMARC';
31 my $parse_options = GetOptions(
32 'idfield=s' => \$idfield,
33 'idsubfield=s' => \$idsubfield,
34 'bibfield=s'=> \$bibfield,
35 'bibsubfield=s'=> \$bibsubfield,
36 'startid=i'=> \$count,
38 'config=s' => \$config,
39 'marctype=s' => \$marctype,
41 'libmap=s' => \$libmap,
46 if (!$parse_options or $help) {
50 @files = @ARGV if (!@files);
52 my $U = 'OpenILS::Application::AppUtils';
58 $lib_id_map = map_libraries_to_ID($libmap);
61 OpenSRF::System->bootstrap_client( config_file => $config );
62 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
64 my ($result, $evt) = get_user_id($user);
65 if ($evt || !$result->id) {
66 print("Could not retrieve user with user name '$user'\n");
72 select STDERR; $| = 1;
73 select STDOUT; $| = 1;
75 my $batch = new MARC::Batch ( $marctype, @files );
77 $batch->warnings_off();
81 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
86 $record_field = $rec->field($idfield, $idsubfield);
88 $record_field = $rec->field($idfield);
93 $record = $record_field->data;
96 # If we have been given bibfield / bibsubfield values, use those to find
97 # a matching bib record for $record and use _that_ as our record instead
99 my ($result, $evt) = map_id_to_bib($record);
100 if ($evt || !$result->record) {
101 print("Could not find matching bibliographic record for $record\n");
103 $record = $result->record;
105 # Strip the identifier down to a usable integer
106 $record =~ s/^.*?(\d+).*?$/$1/o;
109 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
110 $xml =~ s/^<\?xml.+\?\s*>//go;
111 $xml =~ s/>\s+</></go;
112 $xml =~ s/\p{Cc}//go;
113 $xml = OpenILS::Application::AppUtils->entityize($xml);
114 $xml =~ s/[\x00-\x1f]//go;
116 my $bib = new Fieldmapper::serial::record_entry;
118 $bib->record($record);
122 $bib->creator($user);
123 $bib->create_date('now');
125 $bib->edit_date('now');
126 $bib->last_xact_id('IMPORT-'.$starttime);
129 my $lib_id = get_library_id($rec);
131 $bib->owning_lib($lib_id);
135 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
139 if (!$quiet && !($count % 20)) {
140 print STDERR "\r$count\t". $count / (time - $starttime);
144 # Generate a hash of library names (as found in the 852b in the MFHD record) to
145 # integers representing actor.org_unit ID values
146 sub map_libraries_to_ID {
147 my $map_filename = shift;
151 open(MAP_FH, '<', $map_filename) or die "Could not load [$map_filename] $!";
153 my ($lib, $id) = $_ =~ /^(.*?)\t(.*?)$/;
154 $lib_id_map{$lib} = $id;
160 # Look up the actor.org_unit.id value for this library name
164 my $lib_name = $record->field('852')->subfield('b');
165 my $lib_id = $lib_id_map->{$lib_name};
170 # Get the actor.usr.id value for the given username
172 my $username = shift;
176 $result = $U->cstorereq(
177 'open-ils.cstore.direct.actor.user.search',
178 { usrname => $username, deleted => 'f' }
180 $evt = OpenILS::Event->new('ACTOR_USR_NOT_FOUND') unless $result;
182 return ($result, $evt);
185 # Get the biblio.record_entry.id value for the given identifier; note that this
186 # approach uses a wildcard to match anything that precedes the identifier value
194 value => { ilike => '%' . $record }
198 $search{'subfield'} = $bibsubfield;
201 $result = $U->cstorereq(
202 'open-ils.cstore.direct.metabib.full_rec.search', \%search
204 $evt = OpenILS::Event->new('METABIB_FULL_REC_NOT_FOUND') unless $record;
206 return ($result, $evt);
213 marc2sre.pl - Convert MARC Format for Holdings Data (MFHD) records to SRE
214 (serial.record_entry) JSON objects
218 C<marc2sre.pl> [B<--config>=I<opensrf_core.conf>]
219 [[B<--idfield>=I<MARC-tag>[ B<--idsubfield>=I<MARC-code>]] [B<--start_id>=I<start-ID>]
220 [B<--user>=I<db-username>] [B<--marctype>=I<fileformat>]
221 [[B<--file>=I<MARC-filename>[, ...]] [B<--libmap>=I<map-file>] [B<--quiet>=I<quiet>]
222 [[B<--bibfield>=I<MARC-tag> [B<--bibsubfield>=<MARC-code>]]
226 For one or more files containing MFHD records, iterate through the records
227 and generate SRE (serial.record_entry) JSON objects.
233 =item * B<-c> I<config-file>, B<--config>=I<config-file>
235 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
236 Defaults to F<@sysconfdir@/opensrf_core.xml>
238 =item * B<--idfield> I<MARC-field>
240 Specifies the MFHD field where the identifier of the corresponding
241 bibliographic record is found. Defaults to '004'.
243 =item * B<--idsubfield> I<MARC-code>
245 Specifies the MFHD subfield, if any, where the identifier of the corresponding
246 bibliographic record is found. This option is ignored unless it is accompanied
247 by the B<--idfield> option. Defaults to null.
249 =item * B<--bibfield> I<MARC-field>
251 Specifies the field in the bibliographic record that holds the identifier
252 value. Defaults to null.
254 =item * B<--bibsubfield> I<MARC-code>
256 Specifies the subfield in the bibliographic record, if any, that holds the
257 identifier value. This option is ignored unless it is accompanied by the
258 B<--bibfield> option. Defaults to null.
260 =item * B<-u> I<username>, B<--user>=I<username>
262 Specifies the Evergreen user that will own these serial records.
264 =item * B<-m> I<file-format>, B<--marctype>=I<file-format>
266 Specifies whether the files containg the MFHD records are in MARC21 ('MARC21')
267 or MARC21XML ('XML') format. Defaults to MARC21.
269 =item * B<-l> I<map-file>, B<--libmap>=I<map-file>
271 Points to a file to containing a mapping of library names to integers.
272 The integer represents the actor.org_unit.id value of the library. This enables
273 us to generate an ingest file that does not subsequently need to manually
276 The library name must correspond to the 'b' subfield of the 852 field.
277 Well, it does not have to, but you will have to modify this script
280 The format of the map file should be the name of the library, followed
281 by a tab, followed by the desired numeric ID of the library. For example:
286 =item * B<-q>, B<--quiet>
288 Suppresses the record counter output.
294 marc2sre.pl --idfield 004 --bibfield 035 --bibsubfield a --user cat1 serial_holding.xml
296 Processes MFHD records in the B<serial_holding.xml> file. The script pulls the
297 bibliographic record identifier from the 004 control field of the MFHD record
298 and searches for a matching value in the bibliographic record in data field
299 035, subfield a. The "cat1" user will own the processed MFHD records.
303 Dan Scott <dscott@laurentian.ca>
305 =head1 COPYRIGHT AND LICENSE
307 Copyright 2010-2011 by Dan Scott
309 This program is free software; you can redistribute it and/or
310 modify it under the terms of the GNU General Public License
311 as published by the Free Software Foundation; either version 2
312 of the License, or (at your option) any later version.
314 This program is distributed in the hope that it will be useful,
315 but WITHOUT ANY WARRANTY; without even the implied warranty of
316 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
317 GNU General Public License for more details.
319 You should have received a copy of the GNU General Public License
320 along with this program; if not, write to the Free Software
321 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.