]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/extras/import/pg_loader.pl
Always use BinaryEncodeing => "UTF-8" with MARC::File::XML
[working/Evergreen.git] / Open-ILS / src / extras / import / pg_loader.pl
1 #!/usr/bin/perl
2 use strict;
3
4 use lib '/openils/lib/perl5/';
5
6 use OpenSRF::System;
7 use OpenSRF::EX qw/:try/;
8 use OpenSRF::Utils::SettingsClient;
9 use OpenILS::Utils::Fieldmapper;
10 use OpenSRF::Utils::JSON;
11 use FileHandle;
12
13 use Time::HiRes qw/time/;
14 use Getopt::Long;
15
16 my @files;
17 my ($config, $output, @auto, @order, @wipe, $quiet) =
18         ('/openils/conf/opensrf_core.xml');
19 my $nocommit = 0;
20
21 GetOptions( 'config=s'      => \$config,
22             'output=s'      => \$output,
23             'wipe=s'        => \@wipe,
24             'autoprimary=s' => \@auto,
25             'order=s'       => \@order,
26             'nocommit|n'    => \$nocommit,
27             'quiet'         => \$quiet,
28 );
29
30 my %lineset;
31 my %fieldcache;
32
33 OpenSRF::System->bootstrap_client( config_file => $config );
34 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
35
36 my $count = 0;
37 my $starttime = time;
38 while ( my $rec = <> ) {
39         next unless ($rec);
40
41         my $row;
42         try {
43                 $row = OpenSRF::Utils::JSON->JSON2perl($rec);
44         } catch Error with {
45                 my $e = shift;
46                 warn "\n\n !!! Error : $e \n\n at or around line $count\n";
47         };
48         next unless ($row);
49
50         my $class = $row->class_name;
51         my $hint = $row->json_hint;
52
53         if (!$lineset{$hint}) {
54                 $lineset{$hint} = [];
55                 my @cols = $row->real_fields;
56                 if (grep { $_ eq $hint} @auto) {
57                         @cols = grep { $_ ne $class->Identity } @cols;
58                 }
59
60                 $fieldcache{$hint} =
61                         { table => $class->Table,
62                           sequence => $class->Sequence,
63                           pkey => $class->Identity,
64                           fields => \@cols,
65                         };
66
67         #XXX it burnnnsssessss
68         $fieldcache{$hint}{table} =~ s/\.full_rec/.real_full_rec/o if ($hint eq 'mfr');
69         }
70
71         push @{ $lineset{$hint} }, [map { $row->$_ } @{ $fieldcache{$hint}{fields} }];
72
73         if (!$quiet && !($count % 500)) {
74                 print STDERR "\r$count\t". $count / (time - $starttime);
75         }
76
77         $count++;
78 }
79
80 print STDERR "\nWriting file ...\n" if (!$quiet);
81
82 $output = '&STDOUT' unless ($output);
83 $output = FileHandle->new(">$output") if ($output);
84
85 binmode($output,'utf8');
86
87 $output->print("SET CLIENT_ENCODING TO 'UNICODE';\n\n");
88 $output->print("BEGIN;\n\n");
89
90 my $after_commit = '';
91 for my $h (@order) {
92         # continue if there was no data for this table
93         next unless ($fieldcache{$h});
94
95         my $fields = join(',', @{ $fieldcache{$h}{fields} });
96         $output->print( "DELETE FROM $fieldcache{$h}{table};\n" ) if (grep {$_ eq $h } @wipe);
97         # Speed up loading of bib records
98         $output->print( "COPY $fieldcache{$h}{table} ($fields) FROM STDIN;\n" );
99
100         for my $line (@{ $lineset{$h} }) {
101                 my @data;
102                 my $x = 0;
103                 for my $d (@$line) {
104                         if (!defined($d)) {
105                                 $d = '\N';
106                         } else {
107                                 $d =~ s/\f/\\f/gos;
108                                 $d =~ s/\n/\\n/gos;
109                                 $d =~ s/\r/\\r/gos;
110                                 $d =~ s/\t/\\t/gos;
111                                 $d =~ s/\\/\\\\/gos;
112                         }
113                         if ($h eq 'bre' and $fieldcache{$h}{fields}[$x] eq 'quality') {
114                                 $d = int($d) if ($d ne '\N');
115                         }
116                         push @data, $d;
117                         $x++;
118                 }
119                 $output->print( join("\t", @data)."\n" );
120         }
121
122         $output->print('\.'."\n\n");
123         
124         if ($h eq 'mfr') {
125                 $output->print("SELECT reporter.enable_materialized_simple_record_trigger();\n");
126                 $output->print("SELECT reporter.disable_materialized_simple_record_trigger();\n");
127         }
128
129         $after_commit .= "SELECT setval('$fieldcache{$h}{sequence}'::TEXT, (SELECT MAX($fieldcache{$h}{pkey}) FROM $fieldcache{$h}{table}), TRUE);\n"
130                 if (!grep { $_ eq $h} @auto);
131 }
132
133 $output->print("COMMIT;\n\n") unless $nocommit;
134 $output->print($after_commit);
135 $output->close;