use 001 as the authority record number value
[Evergreen.git] / Open-ILS / src / extras / import / marc2are.pl
1 #!/usr/bin/perl
2 use strict;
3 use warnings;
4
5 use lib '/openils/lib/perl5/';
6
7 use OpenSRF::System;
8 use OpenSRF::Application;
9 use OpenSRF::EX qw/:try/;
10 use OpenSRF::AppSession;
11 use OpenSRF::MultiSession;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenILS::Application::AppUtils;
14 use OpenILS::Utils::Fieldmapper;
15 use Digest::MD5 qw/md5_hex/;
16 use OpenSRF::Utils::JSON;
17 use Data::Dumper;
18 use Unicode::Normalize;
19
20 use Time::HiRes qw/time/;
21 use Getopt::Long;
22 use MARC::Batch;
23 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
24 use MARC::Charset;
25
26 MARC::Charset->ignore_errors(1);
27
28 my ($utf8, $id_field, $count, $user, $password, $config, $marctype, $keyfile,  @files, @trash_fields, $quiet) =
29         (0, '998', 1, 'admin', 'open-ils', '/openils/conf/opensrf_core.xml', 'USMARC');
30
31 GetOptions(
32         'startid=i'     => \$count,
33         'user=s'        => \$user,
34         'marctype=s'    => \$marctype,
35         'password=s'    => \$password,
36         'config=s'      => \$config,
37         'file=s'        => \@files,
38         'quiet'         => \$quiet,
39 );
40
41 @files = @ARGV if (!@files);
42
43 my @ses;
44 my @req;
45 my %processing_cache;
46
47 OpenSRF::System->bootstrap_client( config_file => $config );
48 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
49
50 $user = OpenILS::Application::AppUtils->check_user_session( login($user,$password) )->id;
51
52 select STDERR; $| = 1;
53 select STDOUT; $| = 1;
54
55 my $batch = new MARC::Batch ( $marctype, @files );
56 $batch->strict_off();
57 $batch->warnings_off();
58
59 my $starttime = time;
60 my $rec;
61 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
62         next if ($rec == -1);
63         my $id = $count;
64         my $_001 = $rec->field('001');
65         my $arn = $count;
66         $arn = $_001->data if ($_001);
67
68         (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
69         $xml =~ s/^<\?xml.+\?\s*>//go;
70         $xml =~ s/>\s+</></go;
71         $xml =~ s/\p{Cc}//go;
72         $xml = entityize($xml);
73         $xml =~ s/[\x00-\x1f]//go;
74
75         my $bib = new Fieldmapper::authority::record_entry;
76         $bib->id($id);
77         $bib->active('t');
78         $bib->deleted('f');
79         $bib->marc($xml);
80         $bib->creator($user);
81         $bib->create_date('now');
82         $bib->editor($user);
83         $bib->edit_date('now');
84         $bib->arn_source('LEGACY');
85         $bib->arn_value($arn);
86         $bib->last_xact_id('IMPORT-'.$starttime);
87
88         print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
89
90         $count++;
91
92         if (!$quiet && !($count % 20)) {
93                 print STDERR "\r$count\t". $count / (time - $starttime);
94         }
95 }
96
97 sub login {        
98         my( $username, $password, $type ) = @_;
99
100         $type |= "staff"; 
101
102         my $seed = OpenILS::Application::AppUtils->simplereq(
103                 'open-ils.auth',
104                 'open-ils.auth.authenticate.init',
105                 $username
106         );
107
108         die("No auth seed. Couldn't talk to the auth server") unless $seed;
109
110         my $response = OpenILS::Application::AppUtils->simplereq(
111                 'open-ils.auth',
112                 'open-ils.auth.authenticate.complete',
113                 {       username => $username,
114                         password => md5_hex($seed . md5_hex($password)),
115                         type => $type });
116
117         die("No auth response returned on login.") unless $response;
118
119         my $authtime = $response->{payload}->{authtime};
120         my $authtoken = $response->{payload}->{authtoken};
121
122         die("Login failed for user $username!") unless $authtoken;
123
124         return $authtoken;
125 }       
126
127 sub entityize {
128         my $stuff = shift;
129         my $form = shift;
130
131         if ($form and $form eq 'D') {
132                 $stuff = NFD($stuff);
133         } else {
134                 $stuff = NFC($stuff);
135         }
136
137         $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
138         return $stuff;
139 }
140