adding support for a --marctype flag, useful for supplying a MARCXML stream instead...
[Evergreen.git] / Open-ILS / src / extras / import / marc2are.pl
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 use lib '/openils/lib/perl5/';
6
7 use OpenSRF::System;
8 use OpenSRF::Application;
9 use OpenSRF::EX qw/:try/;
10 use OpenSRF::AppSession;
11 use OpenSRF::MultiSession;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenILS::Application::AppUtils;
14 use OpenILS::Utils::Fieldmapper;
15 use Digest::MD5 qw/md5_hex/;
16 use JSON;
17 use Data::Dumper;
18 use Unicode::Normalize;
19
20 use Time::HiRes qw/time/;
21 use Getopt::Long;
22 use MARC::Batch;
23 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
24 use MARC::Charset;
25
26 MARC::Charset->ignore_errors(1);
27
28 my ($utf8, $id_field, $count, $user, $password, $config, $marctype, $keyfile,  @files, @trash_fields) =
29         (0, '998', 1, 'admin', 'open-ils', '/openils/conf/bootstrap.conf', 'USMARC');
30
31 GetOptions(
32         'startid=i'     => \$count,
33         'user=s'        => \$user,
34         'marctype=s'    => \$marctype,
35         'password=s'    => \$password,
36         'config=s'      => \$config,
37         'file=s'        => \@files,
38 );
39
40 @files = @ARGV if (!@files);
41
42 my @ses;
43 my @req;
44 my %processing_cache;
45
46 OpenSRF::System->bootstrap_client( config_file => $config );
47 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
48
49 $user = OpenILS::Application::AppUtils->check_user_session( login($user,$password) )->id;
50
51 select STDERR; $| = 1;
52 select STDOUT; $| = 1;
53
54 my $batch = new MARC::Batch ( $marctype, @files );
55 $batch->strict_off();
56 $batch->warnings_off();
57
58 my $starttime = time;
59 my $rec;
60 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
61         next if ($rec == -1);
62         my $id = $count;
63
64         (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
65         $xml =~ s/^<\?xml.+\?\s*>//go;
66         $xml =~ s/>\s+</></go;
67         $xml =~ s/\p{Cc}//go;
68         $xml = entityize($xml);
69
70         my $bib = new Fieldmapper::authority::record_entry;
71         $bib->id($id);
72         $bib->active('t');
73         $bib->deleted('f');
74         $bib->marc($xml);
75         $bib->creator($user);
76         $bib->create_date('now');
77         $bib->editor($user);
78         $bib->edit_date('now');
79         $bib->arn_source('LEGACY');
80         $bib->arn_value($count);
81         $bib->last_xact_id('IMPORT-'.$starttime);
82
83         print JSON->perl2JSON($bib)."\n";
84
85         $count++;
86
87         if (!($count % 20)) {
88                 print STDERR "\r$count\t". $count / (time - $starttime);
89         }
90 }
91
92 sub login {        
93         my( $username, $password, $type ) = @_;
94
95         $type |= "staff"; 
96
97         my $seed = OpenILS::Application::AppUtils->simplereq(
98                 'open-ils.auth',
99                 'open-ils.auth.authenticate.init',
100                 $username
101         );
102
103         die("No auth seed. Couldn't talk to the auth server") unless $seed;
104
105         my $response = OpenILS::Application::AppUtils->simplereq(
106                 'open-ils.auth',
107                 'open-ils.auth.authenticate.complete',
108                 {       username => $username,
109                         password => md5_hex($seed . md5_hex($password)),
110                         type => $type });
111
112         die("No auth response returned on login.") unless $response;
113
114         my $authtime = $response->{payload}->{authtime};
115         my $authtoken = $response->{payload}->{authtoken};
116
117         die("Login failed for user $username!") unless $authtoken;
118
119         return $authtoken;
120 }       
121
122 sub entityize {
123         my $stuff = shift;
124         my $form = shift;
125
126         if ($form and $form eq 'D') {
127                 $stuff = NFD($stuff);
128         } else {
129                 $stuff = NFC($stuff);
130         }
131
132         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
133         return $stuff;
134 }
135