6 use OpenSRF::EX qw/:try/;
7 use OpenSRF::Utils::SettingsClient;
8 use OpenILS::Application::AppUtils;
10 use OpenILS::Utils::Fieldmapper;
11 use OpenSRF::Utils::JSON;
12 use Unicode::Normalize;
14 use Time::HiRes qw/time/;
17 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
21 MARC::Charset->ignore_errors(1);
23 # Command line options, with applicable defaults
24 my ($idsubfield, $bibfield, $bibsubfield, @files, $libmap, $quiet, $help);
28 my $config = '@sysconfdir@/opensrf_core.xml';
29 my $marctype = 'USMARC';
31 my $parse_options = GetOptions(
32 'idfield=s' => \$idfield,
33 'idsubfield=s' => \$idsubfield,
34 'bibfield=s'=> \$bibfield,
35 'bibsubfield=s'=> \$bibsubfield,
36 'startid=i'=> \$count,
38 'config=s' => \$config,
39 'marctype=s' => \$marctype,
41 'libmap=s' => \$libmap,
46 if (!$parse_options or $help) {
50 @files = @ARGV if (!@files);
52 my $U = 'OpenILS::Application::AppUtils';
58 $lib_id_map = map_libraries_to_ID($libmap);
61 OpenSRF::System->bootstrap_client( config_file => $config );
62 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
64 my ($result, $evt) = get_user_id($user);
65 if ($evt || !$result->id) {
66 print("Could not retrieve user with user name '$user'\n");
72 select STDERR; $| = 1;
73 select STDOUT; $| = 1;
75 my $batch = new MARC::Batch ( $marctype, @files );
77 $batch->warnings_off();
81 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
86 $record_field = $rec->field($idfield, $idsubfield);
88 $record_field = $rec->field($idfield);
91 # Start by just using the counter as the record ID
94 # If we have identified a location for the bib record ID, grab that value
96 $record = $record_field->data;
99 # If we didn't get a bib record ID, skip and move on to the next MFHD record
101 print STDERR "Could not find a bibliographic record ID link for record $count\n";
105 # If we have been given bibfield / bibsubfield values, use those to find
106 # a matching bib record for $record and use _that_ as our record instead
108 my ($result, $evt) = map_id_to_bib($record);
109 if ($evt || !$result || !$result->record) {
110 print STDERR "Could not find matching bibliographic record for record $count\n";
113 $record = $result->record;
115 # Strip the identifier down to a usable integer
116 $record =~ s/^.*?(\d+).*?$/$1/o;
119 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
120 $xml =~ s/^<\?xml.+\?\s*>//go;
121 $xml =~ s/>\s+</></go;
122 $xml =~ s/\p{Cc}//go;
123 $xml = OpenILS::Application::AppUtils->entityize($xml);
124 $xml =~ s/[\x00-\x1f]//go;
126 my $bib = new Fieldmapper::serial::record_entry;
128 $bib->record($record);
132 $bib->creator($user);
133 $bib->create_date('now');
135 $bib->edit_date('now');
136 $bib->last_xact_id('IMPORT-'.$starttime);
139 my $lib_id = get_library_id($rec);
141 $bib->owning_lib($lib_id);
145 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
149 if (!$quiet && !($count % 20)) {
150 print STDERR "\r$count\t". $count / (time - $starttime);
154 # Generate a hash of library names (as found in the 852b in the MFHD record) to
155 # integers representing actor.org_unit ID values
156 sub map_libraries_to_ID {
157 my $map_filename = shift;
161 open(MAP_FH, '<', $map_filename) or die "Could not load [$map_filename] $!";
163 my ($lib, $id) = $_ =~ /^(.*?)\t(.*?)$/;
164 $lib_id_map{$lib} = $id;
170 # Look up the actor.org_unit.id value for this library name
174 my $lib_name = $record->field('852')->subfield('b');
175 my $lib_id = $lib_id_map->{$lib_name};
180 # Get the actor.usr.id value for the given username
182 my $username = shift;
186 $result = $U->cstorereq(
187 'open-ils.cstore.direct.actor.user.search',
188 { usrname => $username, deleted => 'f' }
190 $evt = OpenILS::Event->new('ACTOR_USR_NOT_FOUND') unless $result;
192 return ($result, $evt);
195 # Get the biblio.record_entry.id value for the given identifier; note that this
196 # approach uses a wildcard to match anything that precedes the identifier value
204 value => { ilike => '%' . $record }
208 $search{'subfield'} = $bibsubfield;
211 $result = $U->cstorereq(
212 'open-ils.cstore.direct.metabib.full_rec.search', \%search
214 $evt = OpenILS::Event->new('METABIB_FULL_REC_NOT_FOUND') unless $record;
216 return ($result, $evt);
223 marc2sre.pl - Convert MARC Format for Holdings Data (MFHD) records to SRE
224 (serial.record_entry) JSON objects
228 C<marc2sre.pl> [B<--config>=I<opensrf_core.conf>]
229 [[B<--idfield>=I<MARC-tag>[ B<--idsubfield>=I<MARC-code>]] [B<--start_id>=I<start-ID>]
230 [B<--user>=I<db-username>] [B<--marctype>=I<fileformat>]
231 [[B<--file>=I<MARC-filename>[, ...]] [B<--libmap>=I<map-file>] [B<--quiet>=I<quiet>]
232 [[B<--bibfield>=I<MARC-tag> [B<--bibsubfield>=<MARC-code>]]
236 For one or more files containing MFHD records, iterate through the records
237 and generate SRE (serial.record_entry) JSON objects.
243 =item * B<-c> I<config-file>, B<--config>=I<config-file>
245 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
246 Defaults to F<@sysconfdir@/opensrf_core.xml>
248 =item * B<--idfield> I<MARC-field>
250 Specifies the MFHD field where the identifier of the corresponding
251 bibliographic record is found. Defaults to '004'.
253 =item * B<--idsubfield> I<MARC-code>
255 Specifies the MFHD subfield, if any, where the identifier of the corresponding
256 bibliographic record is found. This option is ignored unless it is accompanied
257 by the B<--idfield> option. Defaults to null.
259 =item * B<--bibfield> I<MARC-field>
261 Specifies the field in the bibliographic record that holds the identifier
262 value. Defaults to null.
264 =item * B<--bibsubfield> I<MARC-code>
266 Specifies the subfield in the bibliographic record, if any, that holds the
267 identifier value. This option is ignored unless it is accompanied by the
268 B<--bibfield> option. Defaults to null.
270 =item * B<-u> I<username>, B<--user>=I<username>
272 Specifies the Evergreen user that will own these serial records.
274 =item * B<-m> I<file-format>, B<--marctype>=I<file-format>
276 Specifies whether the files containg the MFHD records are in MARC21 ('MARC21')
277 or MARC21XML ('XML') format. Defaults to MARC21.
279 =item * B<-l> I<map-file>, B<--libmap>=I<map-file>
281 Points to a file to containing a mapping of library names to integers.
282 The integer represents the actor.org_unit.id value of the library. This enables
283 us to generate an ingest file that does not subsequently need to manually
286 The library name must correspond to the 'b' subfield of the 852 field.
287 Well, it does not have to, but you will have to modify this script
290 The format of the map file should be the name of the library, followed
291 by a tab, followed by the desired numeric ID of the library. For example:
296 =item * B<-q>, B<--quiet>
298 Suppresses the record counter output.
304 marc2sre.pl --idfield 004 --bibfield 035 --bibsubfield a --user cat1 serial_holding.xml
306 Processes MFHD records in the B<serial_holding.xml> file. The script pulls the
307 bibliographic record identifier from the 004 control field of the MFHD record
308 and searches for a matching value in the bibliographic record in data field
309 035, subfield a. The "cat1" user will own the processed MFHD records.
313 Dan Scott <dscott@laurentian.ca>
315 =head1 COPYRIGHT AND LICENSE
317 Copyright 2010-2011 by Dan Scott
319 This program is free software; you can redistribute it and/or
320 modify it under the terms of the GNU General Public License
321 as published by the Free Software Foundation; either version 2
322 of the License, or (at your option) any later version.
324 This program is distributed in the hope that it will be useful,
325 but WITHOUT ANY WARRANTY; without even the implied warranty of
326 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
327 GNU General Public License for more details.
329 You should have received a copy of the GNU General Public License
330 along with this program; if not, write to the Free Software
331 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.