5 use lib '/openils/lib/perl5/';
8 use OpenILS::Utils::Fieldmapper;
9 use Digest::MD5 qw/md5_hex/;
10 use OpenSRF::Utils::JSON;
12 use Unicode::Normalize;
16 use Time::HiRes qw/time/;
19 use MARC::File::XML ( BinaryEncoding => 'utf-8' );
23 #MARC::Charset->ignore_errors(1);
25 my ($id_field, $id_subfield, $recid, $user, $config, $idlfile, $marctype, $keyfile, $dontuse_file, $enc, $force_enc, @files, @trash_fields, @req_fields, $quiet) =
26 ('', 'a', 0, 1, '/openils/conf/opensrf_core.xml', '/openils/conf/fm_IDL.xml', 'USMARC');
28 my ($db_driver,$db_host,$db_name,$db_user,$db_pw) =
29 ('Pg','localhost','evergreen','postgres','postgres');
31 GetOptions( 'marctype=s' => \$marctype,
32 'startid=i' => \$recid,
33 'idfield=s' => \$id_field,
34 'idsubfield=s' => \$id_subfield,
36 'encoding=s' => \$enc,
37 'hard_encoding' => \$force_enc,
38 'keyfile=s' => \$keyfile,
39 'config=s' => \$config,
41 'required_field=s' => \@req_fields,
42 'trash=s' => \@trash_fields,
43 'xml_idl=s' => \$idlfile,
44 'dontuse=s' => \$dontuse_file,
45 "db_driver=s" => \$db_driver,
46 "db_host=s" => \$db_host,
47 "db_name=s" => \$db_name,
48 "db_user=s" => \$db_user,
54 @trash_fields = split(/,/,join(',',@trash_fields));
57 MARC::Charset->ignore_errors(1);
58 MARC::Charset->assume_encoding($enc);
61 if (uc($marctype) eq 'XML') {
67 @files = @ARGV if (!@files);
73 my $dsn = "dbi:$db_driver:host=$db_host;dbname=$db_name";
76 my $table = 'biblio_record_entry';
77 $table = 'biblio.record_entry' if ($db_driver eq 'Pg');
79 my $dbh = DBI->connect($dsn,$db_user,$db_pw);
80 my $sth = $dbh->prepare("SELECT MAX(id) + 1 FROM $table");
83 $sth->bind_col(1, \$recid);
98 Fieldmapper->import(IDL => $idlfile);
102 open F, $keyfile or die "Couldn't open key file $keyfile";
104 if ( /^(\d+)\|(\S+)/o ) {
113 open F, $dontuse_file or die "Couldn't open used-id file $dontuse_file";
123 select STDERR; $| = 1;
124 select STDOUT; $| = 1;
126 my $batch = new MARC::Batch ( $marctype, @files );
127 $batch->strict_off();
128 $batch->warnings_off();
131 my $starttime = time;
134 PROCESS: while ( try { $rec = $batch->next } otherwise { $rec = -1 } ) {
135 next if ($rec == -1);
137 # Skip records that don't contain a required field (like '245', for example)
138 foreach my $req_field(@req_fields) {
139 next PROCESS if !$rec->field("$req_field");
144 while (exists $used_ids{$recid}) {
147 $used_ids{$recid} = 1;
150 my $field = $rec->field($id_field);
152 if ($field->is_control_field) {
155 $id = $field->subfield($id_subfield);
160 $id = '' if (exists $dontuse_id{$id});
168 if (my $tcn = $keymap{$id}) {
169 $rec->delete_field( $_ ) for ($rec->field($id_field));
170 $rec->append_fields( MARC::Field->new( $id_field, '', '', $id_subfield, $tcn ) );
179 $id = $rec->subfield('901' => 'c')
181 ($rec, $tcn) = preprocess($rec, $id);
182 $tcn->add_subfields(c => $id);
184 $rec->delete_field( $_ ) for ($rec->field($id_field));
185 $rec->append_fields( $tcn );
190 my $tcn_value = $rec->subfield('901' => 'a') || "SYS$id";
191 my $tcn_source = $rec->subfield('901' => 'b') || 'System';
193 (my $xml = $rec->as_xml_record()) =~ s/\n//sog;
194 $xml =~ s/^<\?xml.+\?\s*>//go;
195 $xml =~ s/>\s+</></go;
196 $xml =~ s/\p{Cc}//go;
197 $xml = entityize($xml,'D');
198 $xml =~ s/[\x00-\x1f]//go;
200 my $bib = new Fieldmapper::biblio::record_entry;
205 $bib->creator($user);
206 $bib->create_date('now');
208 $bib->edit_date('now');
209 $bib->tcn_source($tcn_source);
210 $bib->tcn_value($tcn_value);
211 $bib->last_xact_id('IMPORT-'.$starttime);
213 print OpenSRF::Utils::JSON->perl2JSON($bib)."\n";
214 $dontuse_id{$tcn_value} = 1;
218 if (!$quiet && !($count % 50)) {
219 print STDERR "\r$count\t". $count / (time - $starttime);
227 my ($source, $value) = ('','');
229 $id = '' if (exists $dontuse_id{$id});
232 my $f = $rec->field('001');
233 $id = $f->data if ($f);
234 $id = '' if (exists $dontuse_id{$id});
237 if (!$id || exists $dontuse_id{$source.$id}) {
238 my $f = $rec->field('000');
239 $id = $f->data if ($f);
240 $source = 'g' if ($f); # only PG seems to use this
243 if (!$id || exists $dontuse_id{$source.$id}) {
244 my $f = $rec->field('020');
245 $id = $f->subfield('a') if ($f);
246 $source = 'i' if ($f);
249 if (!$id || exists $dontuse_id{$source.$id}) {
250 my $f = $rec->field('022');
251 $id = $f->subfield('a') if ($f);
252 $source = 'i' if ($f);
255 if (!$id || exists $dontuse_id{$source.$id}) {
256 my $f = $rec->field('010');
257 $id = $f->subfield('a') if ($f);
258 $source = 'l' if ($f);
261 $rec->delete_field($_) for ($rec->field('901', $id_field, @trash_fields));
266 $id =~ s/^(\S+).*$/$1/o;
268 $id = $source.$id if ($source);
270 ($source, $value) = $id =~ /^(.)(.+)$/o;
271 if ($id =~ /^o(\d+)$/o) {
277 if ($id && exists $dontuse_id{$id}) {
278 warn "\n!!! TCN $id is already in use. Using the record ID ($recid) as a system-generated TCN.\n";
287 my $tcn = MARC::Field->new(
290 b => do { $source_map{$source} || 'System' },
300 if ($form and $form eq 'D') {
301 $stuff = NFD($stuff);
303 $stuff = NFC($stuff);
306 $stuff =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;