5 use lib '/openils/lib/perl5/';
8 use OpenSRF::Application;
9 use OpenSRF::EX qw/:try/;
10 use OpenSRF::AppSession;
11 use OpenSRF::MultiSession;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenILS::Application::AppUtils;
14 use OpenILS::Utils::Fieldmapper;
15 use Digest::MD5 qw/md5_hex/;
18 use Unicode::Normalize;
24 use Time::HiRes qw/time/;
27 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
30 MARC::Charset->ignore_errors(1);
32 my ($id_field, $recid, $user, $config, $marctype, $keyfile, $dontuse_file, $enc, @files, @trash_fields) =
33 ('', 1, 1, '/openils/conf/bootstrap.conf', 'USMARC');
36 'marctype=i' => \$marctype,
37 'startid=i' => \$recid,
38 'idfield=s' => \$id_field,
40 'encoding=s' => \$enc,
41 'keyfile=s' => \$keyfile,
42 'config=s' => \$config,
44 'trash=s' => \@trash_fields,
45 'dontuse=s' => \$dontuse_file
49 MARC::Charset->ignore_errors(1);
50 MARC::Charset->assume_encoding($enc);
53 @files = @ARGV if (!@files);
70 OpenSRF::System->bootstrap_client( config_file => $config );
71 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
75 open F, $keyfile or die "Couldn't open key file $keyfile";
77 if ( /^(\d+)\|(\S+)/o ) {
86 open F, $dontuse_file or die "Couldn't open used-id file $dontuse_file";
96 select STDERR; $| = 1;
97 select STDOUT; $| = 1;
99 my $batch = new MARC::Batch ( $marctype, @files );
100 $batch->strict_off();
101 $batch->warnings_off();
104 my $starttime = time;
107 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
108 next if ($rec == -1);
112 while ($used_ids{$recid}) {
115 $used_ids{$recid} = 1;
118 my $field = $rec->field($id_field);
120 if ($field->is_control_field) {
123 $id = $field->subfield('a');
135 if (my $tcn = $keymap{$id}) {
136 $rec->delete_field( $_ ) for ($rec->field($id_field));
137 $rec->append_fields( MARC::Field->new( $id_field, '', '', 'a', $tcn ) );
144 $rec = preprocess($rec);
145 $rec->delete_field( $_ ) for ($rec->field($id_field));
151 my $tcn_value = $rec->subfield('901' => 'a') || "SYS$id";
152 my $tcn_source = $rec->subfield('901' => 'b') || 'System';
154 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
155 $xml =~ s/^<\?xml.+\?\s*>//go;
156 $xml =~ s/>\s+</></go;
157 $xml =~ s/\p{Cc}//go;
158 $xml = entityize($xml);
160 my $bib = new Fieldmapper::biblio::record_entry;
165 $bib->creator($user);
166 $bib->create_date('now');
168 $bib->edit_date('now');
169 $bib->tcn_source($tcn_source);
170 $bib->tcn_value($tcn_value);
171 $bib->last_xact_id('IMPORT-'.$starttime);
173 print JSON->perl2JSON($bib)."\n";
174 $dontuse_id{$tcn_value} = 1;
178 if (!($count % 50)) {
179 print STDERR "\r$count\t". $count / (time - $starttime);
186 my ($id, $source, $value) = ('','','');
189 my $f = $rec->field('001');
190 $id = $f->data if ($f);
193 if (!$id || $dontuse_id{$source.$id}) {
194 my $f = $rec->field('000');
195 $id = $f->data if ($f);
196 $source = 'g' if ($f); # only PG seems to use this
199 if (!$id || $dontuse_id{$source.$id}) {
200 my $f = $rec->field('020');
201 $id = $f->subfield('a') if ($f);
202 $source = 'i' if ($f);
205 if (!$id || $dontuse_id{$source.$id}) {
206 my $f = $rec->field('022');
207 $id = $f->subfield('a') if ($f);
208 $source = 'i' if ($f);
211 if (!$id || $dontuse_id{$source.$id}) {
212 my $f = $rec->field('010');
213 $id = $f->subfield('a') if ($f);
214 $source = 'l' if ($f);
218 # my $f = $rec->field($id_field);
219 # $id = $f->subfield('a') if ($f);
222 $rec->delete_field($_) for ($rec->field($id_field, @trash_fields));
227 $id =~ s/^(\S+).*$/$1/o;
229 $id = $source.$id if ($source);
231 ($source, $value) = $id =~ /^(.)(.+)$/o;
232 if ($id =~ /^o(\d+)$/o) {
238 if ($id && $dontuse_id{$id}) {
239 warn "\n!!! ID $id is already in use\n";
248 my $tcn = MARC::Field->new(
251 b => do { $source_map{$source} || 'System' },
254 $rec->append_fields($tcn);
263 if ($form and $form eq 'D') {
264 $stuff = NFD($stuff);
266 $stuff = NFC($stuff);
269 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;