2 # Copyright (C) 2010 Laurentian University
3 # Author: Dan Scott <dscott@laurentian.ca>
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 # ---------------------------------------------------------------
21 use MARC::File::XML (BinaryEncoding => 'UTF-8');
23 use OpenILS::Utils::Fieldmapper;
24 use OpenSRF::Utils::SettingsClient;
26 use Unicode::Normalize;
27 use OpenILS::Application::AppUtils;
29 use Pod::Usage qw/ pod2usage /;
31 my ($start_id, $end_id);
32 my $bootstrap = '/openils/conf/opensrf_core.xml';
36 my $result = GetOptions(
38 'configuration=s' => \$bootstrap,
39 'record=i' => \@records,
41 'start_id=i' => \$start_id,
42 'end_id=i' => \$end_id,
45 if (!$result or $options{help}) {
49 OpenSRF::System->bootstrap_client(config_file => $bootstrap);
50 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
52 # must be loaded and initialized after the IDL is parsed
53 use OpenILS::Utils::CStoreEditor;
54 OpenILS::Utils::CStoreEditor::init();
56 my $editor = OpenILS::Utils::CStoreEditor->new;
59 # get a list of all non-deleted records from Evergreen
60 # open-ils.cstore open-ils.cstore.direct.biblio.record_entry.id_list.atomic {"deleted":"f"}
61 $undeleted = $editor->request(
62 'open-ils.cstore.direct.biblio.record_entry.id_list.atomic',
63 [{deleted => 'f'}, {id => { '>' => 0}}]
65 @records = @$undeleted;
68 if ($start_id and $end_id) {
69 @records = ($start_id .. $end_id);
71 # print Dumper($undeleted, \@records);
73 # Hash of controlled fields & subfields in bibliographic records, and their
74 # corresponding controlling fields & subfields in the authority record
76 # So, if the bib 650$a can be controlled by an auth 150$a, that maps to:
77 # 650 => { a => { 150 => 'a'}}
79 100 => { a => { 100 => 'a' },
96 110 => { a => { 110 => 'a' },
111 111 => { a => { 111 => 'a' },
127 130 => { a => { 130 => 'a' },
142 600 => { a => { 100 => 'a' },
167 610 => { a => { 110 => 'a' },
189 611 => { a => { 111 => 'a' },
209 630 => { a => { 130 => 'a' },
228 648 => { a => { 148 => 'a' },
234 650 => { a => { 150 => 'a' },
241 651 => { a => { 151 => 'a' },
247 655 => { a => { 155 => 'a' },
253 700 => { a => { 100 => 'a' },
270 710 => { a => { 110 => 'a' },
285 711 => { a => { 111 => 'a' },
301 730 => { a => { 130 => 'a' },
316 751 => { a => { 151 => 'a' },
322 830 => { a => { 830 => 'a' },
339 foreach my $rec_id (@records) {
342 my $e = OpenILS::Utils::CStoreEditor->new();
343 # State variable; was the record changed?
347 my $record = $e->retrieve_biblio_record_entry($rec_id);
349 # print Dumper($record);
351 my $marc = MARC::Record->new_from_xml($record->marc());
353 # get the list of controlled fields
354 my @c_fields = keys %controllees;
356 foreach my $c_tag (@c_fields) {
357 my @c_subfields = keys %{$controllees{"$c_tag"}};
358 # print "Field: $field subfields: ";
359 # foreach (@subfields) { print "$_ "; }
361 # Get the MARCXML from the record and check for controlled fields/subfields
362 my @bib_fields = ($marc->field($c_tag));
363 foreach my $bib_field (@bib_fields) {
364 # print $_->as_formatted();
368 foreach my $c_subfield (@c_subfields) {
369 my $sf = $bib_field->subfield($c_subfield);
371 # Give me the first element of the list of authority controlling tags for this subfield
372 # XXX Will we need to support more than one controlling tag per subfield? Probably. That
373 # will suck. Oh well, leave that up to Ole to implement.
374 $match_subfields{$c_subfield} = (keys %{$controllees{$c_tag}{$c_subfield}})[0];
375 $match_tag = $match_subfields{$c_subfield};
376 push @searches, {term => $sf, subfield => $c_subfield};
379 # print Dumper(\%match_subfields);
382 my @tags = ($match_tag);
384 # print "Controlling tag: $c_tag and match tag $match_tag\n";
385 # print Dumper(\@tags, \@searches);
387 # Now we've built up a complete set of matching controlled
388 # subfields for this particular field; let's check to see if
389 # we have a matching authority record
390 my $session = OpenSRF::AppSession->create("open-ils.search");
391 my $validates = $session->request("open-ils.search.authority.validate.tag.id_list",
392 "tags", \@tags, "searches", \@searches
394 $session->disconnect();
396 # print Dumper($validates);
398 # Protect against failed (error condition) search request
400 print STDERR "Search for matching authority failed; record # $rec_id\n";
404 if (scalar(@$validates) == 0) {
408 # Iterate through the returned authority record IDs to delete any
409 # matching $0 subfields already in the bib record
410 foreach my $auth_zero (@$validates) {
411 $bib_field->delete_subfield(code => '0', match => qr/\)$auth_zero$/);
414 # Okay, we have a matching authority control; time to
415 # add the magical subfield 0. Use the first returned auth
417 my $auth_id = @$validates[0];
418 my $auth_rec = $e->retrieve_authority_record_entry($auth_id);
419 my $auth_marc = MARC::Record->new_from_xml($auth_rec->marc());
420 my $cni = $auth_marc->field('003')->data();
422 $bib_field->add_subfields('0' => "($cni)$auth_id");
427 my $editor = OpenILS::Utils::CStoreEditor->new(xact=>1);
428 # print $marc->as_formatted();
429 my $xml = $marc->as_xml_record();
431 $xml =~ s/^<\?xml.+\?\s*>//go;
432 $xml =~ s/>\s+</></go;
433 $xml =~ s/\p{Cc}//go;
434 $xml = OpenILS::Application::AppUtils->entityize($xml);
437 $editor->update_biblio_record_entry($record);
446 authority_control_fields.pl - Controls fields in bibliographic records with authorities in Evergreen
450 C<authority_control_fields.pl> [B<--configuration>=I<opensrf_core.conf>]
451 [[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
455 For a given set of records:
459 =item * Iterate through the list of fields that are controlled fields
461 =item * Iterate through the list of subfields that are controlled for
464 =item * Search for a matching authority record for that combination of
469 =item * If we find a match, then add a $0 subfield to that field identifying
470 the controlling authority record
472 =item * If we do not find a match, then insert a row into an "uncontrolled"
473 table identifying the record ID, field, and subfield(s) that were not controlled
477 =item * Iterate through the list of floating subdivisions
481 =item * If we find a match, then add a $0 subfield to that field identifying
482 the controlling authority record
484 =item * If we do not find a match, then insert a row into an "uncontrolled"
485 table identifying the record ID, field, and subfield(s) that were not controlled
489 =item * If we changed the record, update it in the database
497 =item * B<-c> I<config-file>, B<--configuration>=I<config-file>
499 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
500 Defaults to F</openils/conf/opensrf_core.xml>
502 =item * B<-r> I<record-ID>, B<--record>=I<record-ID>
504 Specifies the bibliographic record ID (found in the C<biblio.record_entry.id>
505 column) of the record to process. This option may be specified more than once
506 to process multiple records in a single run.
508 =item * B<-a>, B<--all>
510 Specifies that all bibliographic records should be processed. For large
511 databases, this may take an extraordinarily long amount of time.
513 =item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
515 Specifies the starting ID of the range of bibliographic records to process.
516 This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
519 =item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
521 Specifies the ending ID of the range of bibliographic records to process.
522 This option is ignored unless it is accompanied by the B<-s> or B<--start>
529 authority_control_fields.pl --start_id 1 --end_id 50000
531 Processes the bibliographic records with IDs between 1 and 50,000 using the
532 default OpenSRF configuration file for connection information.
536 Dan Scott <dscott@laurentian.ca>
538 =head1 COPYRIGHT AND LICENSE
540 Copyright 2010-2011 by Dan Scott
542 This program is free software; you can redistribute it and/or
543 modify it under the terms of the GNU General Public License
544 as published by the Free Software Foundation; either version 2
545 of the License, or (at your option) any later version.
547 This program is distributed in the hope that it will be useful,
548 but WITHOUT ANY WARRANTY; without even the implied warranty of
549 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
550 GNU General Public License for more details.
552 You should have received a copy of the GNU General Public License
553 along with this program; if not, write to the Free Software
554 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.