2 # Copyright (C) 2010-2011 Laurentian University
3 # Author: Dan Scott <dscott@laurentian.ca>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 # ---------------------------------------------------------------
21 use MARC::File::XML (BinaryEncoding => 'UTF-8');
24 use OpenILS::Utils::Fieldmapper;
25 use OpenSRF::Utils::SettingsClient;
26 use OpenSRF::EX qw/:try/;
28 use Unicode::Normalize;
29 use OpenILS::Application::AppUtils;
31 use Pod::Usage qw/ pod2usage /;
33 MARC::Charset->assume_unicode(1);
35 my ($start_id, $end_id, $refresh);
37 my $bootstrap = '@sysconfdir@/opensrf_core.xml';
41 my $result = GetOptions(
43 'configuration=s' => \$bootstrap,
44 'record=i' => \@records,
45 'refresh' => \$refresh,
47 'start_id=i' => \$start_id,
48 'end_id=i' => \$end_id,
49 'days_back=i' => \$days_back,
52 if (!$result or $options{help}) {
56 if ($start_id && $days_back) {
57 print "Can't use both start ID and days back!\n";
61 OpenSRF::System->bootstrap_client(config_file => $bootstrap);
62 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
64 # must be loaded and initialized after the IDL is parsed
65 use OpenILS::Utils::CStoreEditor;
66 OpenILS::Utils::CStoreEditor::init();
68 my $e = OpenILS::Utils::CStoreEditor->new;
71 # get a list of all non-deleted records from Evergreen
72 # open-ils.cstore open-ils.cstore.direct.biblio.record_entry.id_list.atomic {"deleted":"f"}
73 $undeleted = $e->request(
74 'open-ils.cstore.direct.biblio.record_entry.id_list.atomic',
75 [{deleted => 'f'}, {id => { '>' => 0}}]
77 @records = @$undeleted;
80 if ($start_id and $end_id) {
81 @records = ($start_id .. $end_id);
84 if (defined $days_back) {
87 # Grab DB information from local settings
88 my $sc = OpenSRF::Utils::SettingsClient->new;
89 my $db_driver = $sc->config_value( reporter => setup => database => 'driver' );
90 my $db_host = $sc->config_value( reporter => setup => database => 'host' );
91 my $db_port = $sc->config_value( reporter => setup => database => 'port' );
92 my $db_name = $sc->config_value( reporter => setup => database => 'db' );
94 $db_name = $sc->config_value( reporter => setup => database => 'name' );
95 print STDERR "WARN: <database><name> is a deprecated setting for database name. For future compatibility, you should use <database><db> instead." if $db_name;
97 my $db_user = $sc->config_value( reporter => setup => database => 'user' );
98 my $db_pw = $sc->config_value( reporter => setup => database => 'pw' );
100 die "Unable to retrieve database connection information from the settings server" unless ($db_driver && $db_host && $db_port && $db_name && $db_user);
102 my $dsn = "dbi:" . $db_driver . ":dbname=" . $db_name .';host=' . $db_host . ';port=' . $db_port;
103 my $dbh = DBI->connect($dsn,$db_user,$db_pw, {AutoCommit => 1, pg_enable_utf8 => 1, RaiseError => 1}) or die "database connection error";
105 # SQL Used to gather a list of ID's
106 my $idstatement = $dbh->prepare("SELECT DISTINCT(id) AS id FROM biblio.record_entry where (date(create_date) = date(now()) or date(edit_date) = date((NOW() - '$days_back day'::interval)))");
108 # Load the list of ID's into the records array
109 $idstatement->execute();
110 while (my $ref = $idstatement->fetchrow_hashref()) {
111 my $id_ref = $ref->{"id"}; # the column name in our sql query is "id"
112 push(@records, $id_ref);
116 # print Dumper($undeleted, \@records);
118 # Hash of controlled fields & subfields in bibliographic records, and their
119 # corresponding controlling fields & subfields in the authority record
121 # So, if the bib 650$a can be controlled by an auth 150$a, that maps to:
122 # 650 => { a => { 150 => 'a'}}
124 100 => { a => { 100 => 'a' },
141 110 => { a => { 110 => 'a' },
156 111 => { a => { 111 => 'a' },
172 130 => { a => { 130 => 'a' },
187 600 => { a => { 100 => 'a' },
212 610 => { a => { 110 => 'a' },
234 611 => { a => { 111 => 'a' },
254 630 => { a => { 130 => 'a' },
273 648 => { a => { 148 => 'a' },
279 650 => { a => { 150 => 'a' },
286 651 => { a => { 151 => 'a' },
292 655 => { a => { 155 => 'a' },
298 700 => { a => { 100 => 'a' },
315 710 => { a => { 110 => 'a' },
330 711 => { a => { 111 => 'a' },
346 730 => { a => { 130 => 'a' },
361 751 => { a => { 151 => 'a' },
367 830 => { a => { 130 => 'a' },
384 foreach my $rec_id (@records) {
387 # State variable; was the record changed?
391 my $record = $e->retrieve_biblio_record_entry($rec_id);
393 # print Dumper($record);
396 my $marc = MARC::Record->new_from_xml($record->marc());
398 # get the list of controlled fields
399 my @c_fields = keys %controllees;
401 foreach my $c_tag (@c_fields) {
402 my @c_subfields = keys %{$controllees{"$c_tag"}};
403 # print "Field: $field subfields: ";
404 # foreach (@subfields) { print "$_ "; }
406 # Get the MARCXML from the record and check for controlled fields/subfields
407 my @bib_fields = ($marc->field($c_tag));
408 foreach my $bib_field (@bib_fields) {
409 # print $_->as_formatted();
411 if ($refresh and defined(scalar($bib_field->subfield('0')))) {
412 $bib_field->delete_subfield(code => '0');
419 foreach my $c_subfield (@c_subfields) {
420 my @sf_values = $bib_field->subfield($c_subfield);
422 # Give me the first element of the list of authority controlling tags for this subfield
423 # XXX Will we need to support more than one controlling tag per subfield? Probably. That
424 # will suck. Oh well, leave that up to Ole to implement.
425 $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0];
426 $match_tag = $match_subfields{$c_subfield};
427 push @searches, map {{term => $_, subfield => $c_subfield}} @sf_values;
430 # print Dumper(\%match_subfields);
433 my @tags = ($match_tag);
435 # print "Controlling tag: $c_tag and match tag $match_tag\n";
436 # print Dumper(\@tags, \@searches);
438 # Now we've built up a complete set of matching controlled
439 # subfields for this particular field; let's check to see if
440 # we have a matching authority record
441 my $session = OpenSRF::AppSession->create("open-ils.search");
442 my $validates = $session->request("open-ils.search.authority.validate.tag.id_list",
443 "tags", \@tags, "searches", \@searches
445 $session->disconnect();
447 # print Dumper($validates);
449 # Protect against failed (error condition) search request
451 print STDERR "Search for matching authority failed; record # $rec_id\n";
455 # Only add linking if one or more was found, but we may have changed
456 # the record already if in --refresh mode.
457 if (scalar(@$validates) > 0) {
459 # Iterate through the returned authority record IDs to delete any
460 # matching $0 subfields already in the bib record
461 foreach my $auth_zero (@$validates) {
462 $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
465 # Okay, we have a matching authority control; time to
466 # add the magical subfield 0. Use the first returned auth
468 my $auth_id = @$validates[0];
469 my $auth_rec = $e->retrieve_authority_record_entry($auth_id);
470 my $auth_marc = MARC::Record->new_from_xml($auth_rec->marc());
471 my $cni = $auth_marc->field('003')->data();
473 $bib_field->add_subfields('0' => "($cni)$auth_id");
479 my $editor = OpenILS::Utils::CStoreEditor->new(xact=>1);
480 # print $marc->as_formatted();
481 my $xml = $marc->as_xml_record();
483 $xml =~ s/^<\?xml.+\?\s*>//go;
484 $xml =~ s/>\s+</></go;
485 $xml =~ s/\p{Cc}//go;
486 $xml = OpenILS::Application::AppUtils->entityize($xml);
489 $editor->update_biblio_record_entry($record);
494 print STDERR "\nRecord # $rec_id : $err\n";
495 import MARC::File::XML; # reset SAX parser so that one bad record doesn't kill the entire process
503 authority_control_fields.pl - Controls fields in bibliographic records with authorities in Evergreen
507 C<authority_control_fields.pl> [B<--configuration>=I<opensrf_core.conf>] [B<--refresh>]
508 [[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
512 For a given set of records:
516 =item * Iterate through the list of fields that are controlled fields
518 =item * Iterate through the list of subfields that are controlled for
521 =item * Search for a matching authority record for that combination of
526 =item * If we find a match, then add a $0 subfield to that field identifying
527 the controlling authority record
529 =item * If we do not find a match, then insert a row into an "uncontrolled"
530 table identifying the record ID, field, and subfield(s) that were not controlled
534 =item * Iterate through the list of floating subdivisions
538 =item * If we find a match, then add a $0 subfield to that field identifying
539 the controlling authority record
541 =item * If we do not find a match, then insert a row into an "uncontrolled"
542 table identifying the record ID, field, and subfield(s) that were not controlled
546 =item * If we changed the record, update it in the database
554 =item * B<-c> I<config-file>, B<--configuration>=I<config-file>
556 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
557 Defaults to F<@sysconfdir@/opensrf_core.xml>
559 =item * B<-r> I<record-ID>, B<--record>=I<record-ID>
561 Specifies the bibliographic record ID (found in the C<biblio.record_entry.id>
562 column) of the record to process. This option may be specified more than once
563 to process multiple records in a single run.
565 =item * B<-a>, B<--all>
567 Specifies that all bibliographic records should be processed. For large
568 databases, this may take an extraordinarily long amount of time.
570 =item * B<-r>, B<--refresh>
572 Specifies that all authority links should be removed from the target
573 bibliographic record(s). This will effectively rewrite all authority
576 =item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
578 Specifies the starting ID of the range of bibliographic records to process.
579 This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
582 =item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
584 Specifies the ending ID of the range of bibliographic records to process.
585 This option is ignored unless it is accompanied by the B<-s> or B<--start>
592 authority_control_fields.pl --start_id 1 --end_id 50000
594 Processes the bibliographic records with IDs between 1 and 50,000 using the
595 default OpenSRF configuration file for connection information.
599 Dan Scott <dscott@laurentian.ca>
601 =head1 COPYRIGHT AND LICENSE
603 Copyright 2010-2011 by Dan Scott
605 This program is free software; you can redistribute it and/or
606 modify it under the terms of the GNU General Public License
607 as published by the Free Software Foundation; either version 2
608 of the License, or (at your option) any later version.
610 This program is distributed in the hope that it will be useful,
611 but WITHOUT ANY WARRANTY; without even the implied warranty of
612 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
613 GNU General Public License for more details.
615 You should have received a copy of the GNU General Public License
616 along with this program; if not, write to the Free Software
617 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.