5 use lib '/openils/lib/perl5/';
8 use OpenILS::Utils::Fieldmapper;
9 use Digest::MD5 qw/md5_hex/;
10 use OpenSRF::Utils::JSON;
12 use Unicode::Normalize;
16 use Time::HiRes qw/time/;
19 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
22 #MARC::Charset->ignore_errors(1);
24 my ($id_field, $recid, $user, $config, $idlfile, $marctype, $keyfile, $dontuse_file, $enc, $force_enc, @files, @trash_fields, $quiet) =
25 ('', 1, 1, '/openils/conf/opensrf_core.xml', '/openils/conf/fm_IDL.xml', 'USMARC');
28 'marctype=s' => \$marctype,
29 'startid=i' => \$recid,
30 'idfield=s' => \$id_field,
32 'encoding=s' => \$enc,
33 'hard_encoding' => \$force_enc,
34 'keyfile=s' => \$keyfile,
35 'config=s' => \$config,
37 'trash=s' => \@trash_fields,
38 'xml_idl=s' => \$idlfile,
39 'dontuse=s' => \$dontuse_file,
44 MARC::Charset->ignore_errors(1);
45 MARC::Charset->assume_encoding($enc);
48 if (uc($marctype) eq 'XML') {
54 @files = @ARGV if (!@files);
72 Fieldmapper->import(IDL => $idlfile);
76 open F, $keyfile or die "Couldn't open key file $keyfile";
78 if ( /^(\d+)\|(\S+)/o ) {
87 open F, $dontuse_file or die "Couldn't open used-id file $dontuse_file";
97 select STDERR; $| = 1;
98 select STDOUT; $| = 1;
100 my $batch = new MARC::Batch ( $marctype, @files );
101 $batch->strict_off();
102 $batch->warnings_off();
105 my $starttime = time;
108 while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
109 next if ($rec == -1);
113 while ($used_ids{$recid}) {
116 $used_ids{$recid} = 1;
119 my $field = $rec->field($id_field);
121 if ($field->is_control_field) {
124 $id = $field->subfield('a');
136 if (my $tcn = $keymap{$id}) {
137 $rec->delete_field( $_ ) for ($rec->field($id_field));
138 $rec->append_fields( MARC::Field->new( $id_field, '', '', 'a', $tcn ) );
146 ($rec, $tcn) = preprocess($rec);
148 $tcn->add_subfields(c => $id);
150 $rec->delete_field( $_ ) for ($rec->field($id_field));
151 $rec->append_fields( $tcn );
157 my $tcn_value = $rec->subfield('901' => 'a') || "SYS$id";
158 my $tcn_source = $rec->subfield('901' => 'b') || 'System';
160 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
161 $xml =~ s/^<\?xml.+\?\s*>//go;
162 $xml =~ s/>\s+</></go;
163 $xml =~ s/\p{Cc}//go;
164 $xml = entityize($xml);
165 $xml =~ s/[\x00-\x1f]//go;
167 my $bib = new Fieldmapper::biblio::record_entry;
172 $bib->creator($user);
173 $bib->create_date('now');
175 $bib->edit_date('now');
176 $bib->tcn_source($tcn_source);
177 $bib->tcn_value($tcn_value);
178 $bib->last_xact_id('IMPORT-'.$starttime);
180 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
181 $dontuse_id{$tcn_value} = 1;
185 if (!$quiet && !($count % 50)) {
186 print STDERR "\r$count\t". $count / (time - $starttime);
193 my ($id, $source, $value) = ('','','');
196 my $f = $rec->field('001');
197 $id = $f->data if ($f);
198 $id = '' if ($dontuse_id{$id});
201 if (!$id || $dontuse_id{$source.$id}) {
202 my $f = $rec->field('000');
203 $id = $f->data if ($f);
204 $source = 'g' if ($f); # only PG seems to use this
207 if (!$id || $dontuse_id{$source.$id}) {
208 my $f = $rec->field('020');
209 $id = $f->subfield('a') if ($f);
210 $source = 'i' if ($f);
213 if (!$id || $dontuse_id{$source.$id}) {
214 my $f = $rec->field('022');
215 $id = $f->subfield('a') if ($f);
216 $source = 'i' if ($f);
219 if (!$id || $dontuse_id{$source.$id}) {
220 my $f = $rec->field('010');
221 $id = $f->subfield('a') if ($f);
222 $source = 'l' if ($f);
226 # my $f = $rec->field($id_field);
227 # $id = $f->subfield('a') if ($f);
230 $rec->delete_field($_) for ($rec->field($id_field, @trash_fields));
235 $id =~ s/^(\S+).*$/$1/o;
237 $id = $source.$id if ($source);
239 ($source, $value) = $id =~ /^(.)(.+)$/o;
240 if ($id =~ /^o(\d+)$/o) {
246 if ($id && $dontuse_id{$id}) {
247 warn "\n!!! ID $id is already in use\n";
256 my $tcn = MARC::Field->new(
259 b => do { $source_map{$source} || 'System' },
269 if ($form and $form eq 'D') {
270 $stuff = NFD($stuff);
272 $stuff = NFC($stuff);
275 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;