8 use OpenSRF::EX qw/:try/;
9 use OpenSRF::AppSession;
10 use OpenSRF::Utils::JSON;
11 use OpenSRF::Utils::SettingsClient;
12 use OpenILS::Application::AppUtils;
13 use OpenILS::Utils::Fieldmapper;
14 use OpenILS::Utils::CStoreEditor;
17 use MARC::File::XML ( BinaryEncoding => 'UTF-8' );
18 use UNIVERSAL::require;
20 use Time::HiRes qw/time/;
24 my @formats = qw/USMARC UNIMARC XML BRE ARE/;
26 my $config = '@sysconfdir@/opensrf_core.xml';
27 my $format = 'USMARC';
28 my $encoding = 'MARC8';
35 my $export_mfhd = undef;
37 my $all_records = undef;
38 my $replace_001 = undef;
43 'items' => \$holdings,
44 'mfhd' => \$export_mfhd,
45 'all' => \$all_records,
46 'replace_001'=> \$replace_001,
47 'location=s' => \$location,
48 'money=s' => \$dollarsign,
49 'config=s' => \$config,
50 'format=s' => \$format,
53 'encoding=s' => \$encoding,
54 'timeout=i' => \$timeout,
55 'library=s' => \@library,
60 This script exports MARC authority, bibliographic, and serial holdings
61 records from an Evergreen database.
63 Input to this script can consist of a list of record IDs, with one record ID
64 per line, corresponding to the record ID in the Evergreen database table of
65 your requested record type.
67 Alternately, passing the --all option will attempt to export all records of
68 the specified type from the Evergreen database. The --all option starts at
69 record ID 1 and increments the ID by 1 until the largest ID in the database
70 is retrieved. This may not be very efficient for databases with large gaps
71 in their ID sequences.
74 --help or -h This screen.
75 --config or -c Configuration file [@sysconfdir@/opensrf_core.xml]
76 --format or -f Output format (USMARC, UNIMARC, XML, BRE, ARE) [USMARC]
77 --encoding or -e Output encoding (UTF-8, ISO-8859-?, MARC8) [MARC8]
78 --xml-idl or -x Location of the IDL XML
79 --timeout Timeout for exporting a single record; increase if you
80 are using --holdings and are exporting records that
81 have a lot of items attached to them.
82 --type or -t Record type (BIBLIO, AUTHORITY) [BIBLIO]
83 --all or -a Export all records; ignores input list
84 --library Export the bibliographic records that have attached
85 holdings for the listed library or libraries as
86 identified by shortname
87 --replace_001 Replace the 001 field value with the record ID
89 Additional options for type = 'BIBLIO':
90 --items or -i Include items (holdings) in the output
91 --money Currency symbol to use in item price field [\$]
92 --mfhd Export serial MFHD records for associated bib records
93 Not compatible with --format=BRE
94 --location or -l MARC Location Code for holdings from
95 http://www.loc.gov/marc/organizations/orgshome.html
99 To export a set of USMARC records in a file named "output_file" based on the
100 IDs contained in a file named "list_of_ids":
101 cat list_of_ids | $0 > output_file
103 To export a set of MARC21XML authority records in a file named "output.xml"
104 for all authority records in the database:
105 $0 --format XML --type AUTHORITY --all > output.xml
107 To export a set of USMARC bibliographic records encoded in UTF-8 in a file
108 named "sys1_bibs.mrc" based on records which have attached callnumbers for the
109 libraries with the short names "BR1" and "BR2":
111 $0 --library BR1 --library BR2 --encoding UTF-8 > sys1_bibs.mrc
117 if ($all_records && @library) {
118 die('Incompatible arguments: you cannot combine a request for all ' .
119 'records with a request for records by library');
123 $format = uc($format);
124 $encoding = uc($encoding);
126 binmode(STDOUT, ':raw') if ($encoding ne 'UTF-8');
127 binmode(STDOUT, ':utf8') if ($encoding eq 'UTF-8');
129 if (!grep { $format eq $_ } @formats) {
130 die "Please select a supported format. ".
131 "Right now that means one of [".
132 join('|',@formats). "]\n";
135 if ($format ne 'XML') {
136 my $type = 'MARC::File::' . $format;
141 # set default timeout and/or correct silly user who
142 # supplied a negative timeout; default timeout of
143 # 300 seconds if exporting items determined empirically.
144 $timeout = $holdings ? 300 : 1;
147 OpenSRF::System->bootstrap_client( config_file => $config );
150 $idl = OpenSRF::Utils::SettingsClient->new->config_value("IDL");
153 Fieldmapper->import(IDL => $idl);
155 my $ses = OpenSRF::AppSession->create('open-ils.cstore');
156 OpenILS::Utils::CStoreEditor::init();
157 my $editor = OpenILS::Utils::CStoreEditor->new();
159 print <<HEADER if ($format eq 'XML');
160 <?xml version="1.0" encoding="$encoding"?>
161 <collection xmlns='http://www.loc.gov/MARC21/slim'>
174 my $last_time = time;
175 my %count = ('bib' => 0, 'did' => 0);
180 if ($type eq 'biblio') {
181 $top_record = $editor->search_biblio_record_entry([
183 {order_by => { 'bre' => 'id DESC' }, limit => 1}
185 } elsif ($type eq 'authority') {
186 $top_record = $editor->search_authority_record_entry([
188 {order_by => { 'are' => 'id DESC' }, limit => 1}
191 for (my $i = 0; $i++ < $top_record;) {
195 my $recids = $editor->json_query({
196 select => { bre => ['id'] },
197 from => { bre => 'acn' },
199 '+bre' => { deleted => 'f' },
204 select => {'aou' => ['id'] },
206 where => { shortname => { in => \@library } }
219 foreach my $record (@$recids) {
220 export_record($record->{id});
223 while ( my $i = <> ) {
227 print "</collection>\n" if ($format eq 'XML');
229 $speed = $count{did} / (time - $start);
230 my $time = time - $start;
233 Exports Attempted : $count{bib}
234 Exports Completed : $count{did}
235 Overall Speed : $speed
236 Total Time Elapsed: $time seconds
245 my $r = $ses->request( "open-ils.cstore.direct.$type.record_entry.retrieve", $id, $flesh );
246 my $s = $r->recv(timeout => $timeout);
248 warn "\n!!!!! Failed trying to read record $id\n";
252 warn "\n!!!!!! Failed trying to read record $id: " . $r->failed->stringify . "\n";
256 warn "\n!!!!!! Timed out trying to read record $id\n";
265 if ($format eq 'ARE' or $format eq 'BRE') {
266 print OpenSRF::Utils::JSON->perl2JSON($bib);
274 my $r = MARC::Record->new_from_xml( $bib->marc, $encoding, $format );
275 if ($type eq 'biblio') {
276 add_bib_holdings($bib, $r);
280 my $tcn = $r->field('001');
284 my $new_001 = MARC::Field->new('001', $id);
285 $r->insert_fields_ordered($new_001);
289 if ($format eq 'XML') {
290 my $xml = $r->as_xml_record;
291 $xml =~ s/^<\?.+?\?>$//mo;
293 } elsif ($format eq 'UNIMARC') {
295 } elsif ($format eq 'USMARC') {
304 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
307 if ($export_mfhd and $type eq 'biblio') {
308 my $mfhds = $editor->search_serial_record_entry({record => $id, deleted => 'f'});
309 foreach my $mfhd (@$mfhds) {
311 my $r = MARC::Record->new_from_xml( $mfhd->marc, $encoding, $format );
313 if ($format eq 'XML') {
314 my $xml = $r->as_xml_record;
315 $xml =~ s/^<\?.+?\?>$//mo;
317 } elsif ($format eq 'UNIMARC') {
319 } elsif ($format eq 'USMARC') {
325 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire export
330 stats() if (! ($count{bib} % 50 ));
337 $speed = $count{did} / (time - $start);
339 my $speed_now = ($count{did} - $count{did_last}) / (time - $count{time_last});
340 my $cn_speed = $count{cn} / (time - $start);
341 my $cp_speed = $count{cp} / (time - $start);
343 printf STDERR "\r $count{did} of $count{bib} @ \%0.4f/s ttl / \%0.4f/s rt ".
344 "($count{cn} CNs @ \%0.4f/s :: $count{cp} CPs @ \%0.4f/s)\r",
350 $count{did_last} = $count{did};
351 $count{time_last} = time;
354 sub get_bib_locations {
355 print STDERR "Retrieving Org Units ... ";
356 my $r = $ses->request( 'open-ils.cstore.direct.actor.org_unit.search', { id => { '!=' => undef } } );
358 while (my $o = $r->recv) {
359 die $r->failed->stringify if ($r->failed);
367 print STDERR "Retrieving Shelving locations ... ";
368 $r = $ses->request( 'open-ils.cstore.direct.asset.copy_location.search', { id => { '!=' => undef } } );
370 while (my $s = $r->recv) {
371 die $r->failed->stringify if ($r->failed);
374 $shelves{$s->id} = $s;
379 $flesh = { flesh => 2, flesh_fields => { bre => [ 'call_numbers' ], acn => [ 'copies' ] } };
382 sub add_bib_holdings {
386 my $cn_list = $bib->call_numbers;
387 if ($cn_list && @$cn_list) {
389 $count{cn} += @$cn_list;
391 my $cp_list = [ map { @{ $_->copies } } @$cn_list ];
392 if ($cp_list && @$cp_list) {
395 push @{$cn_map{$_->call_number}}, $_ for (@$cp_list);
397 for my $cn ( @$cn_list ) {
398 my $cn_map_list = $cn_map{$cn->id};
400 for my $cp ( @$cn_map_list ) {
407 b => $orgs{$cn->owning_lib}->shortname,
408 b => $orgs{$cp->circ_lib}->shortname,
409 c => $shelves{$cp->location}->name,
411 ($cp->circ_modifier ? ( g => $cp->circ_modifier ) : ()),
413 ($cp->price ? ( y => $dollarsign.$cp->price ) : ()),
414 ($cp->copy_number ? ( t => $cp->copy_number ) : ()),
415 ($cp->ref eq 't' ? ( x => 'reference' ) : ()),
416 ($cp->holdable eq 'f' ? ( x => 'unholdable' ) : ()),
417 ($cp->circulate eq 'f' ? ( x => 'noncirculating' ) : ()),
418 ($cp->opac_visible eq 'f' ? ( x => 'hidden' ) : ()),
422 stats() if (! ($count{cp} % 100 ));