8 use MARC::File::XML (BinaryEncoding => 'UTF-8');
11 use OpenILS::Utils::Fieldmapper;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenSRF::EX qw/:try/;
15 use Unicode::Normalize;
16 use OpenILS::Application::AppUtils;
18 use Pod::Usage qw/ pod2usage /;
20 MARC::Charset->assume_unicode(1);
27 $acsaf_cache->{$id} ||=
28 $e->retrieve_authority_control_set_authority_field([
30 {flesh => 1, flesh_fields => {acsaf => ["main_entry"]}}
32 return $acsaf_cache->{$id};
35 # Grab DB information from local settings. Return connected db handle (or die)
37 my $sc = OpenSRF::Utils::SettingsClient->new;
38 my $db_driver = $sc->config_value( reporter => setup => database => 'driver' );
39 my $db_host = $sc->config_value( reporter => setup => database => 'host' );
40 my $db_port = $sc->config_value( reporter => setup => database => 'port' );
41 my $db_name = $sc->config_value( reporter => setup => database => 'db' );
43 $db_name = $sc->config_value( reporter => setup => database => 'name' );
44 print STDERR "WARN: <database><name> is a deprecated setting for database name. For future compatibility, you should use <database><db> instead." if $db_name;
46 my $db_user = $sc->config_value( reporter => setup => database => 'user' );
47 my $db_pw = $sc->config_value( reporter => setup => database => 'pw' );
49 die "Unable to retrieve database connection information from the settings server" unless ($db_driver && $db_host && $db_port && $db_name && $db_user);
51 my $dsn = "dbi:" . $db_driver . ":dbname=" . $db_name .';host=' . $db_host . ';port=' . $db_port;
54 $dsn,$db_user,$db_pw, {
55 AutoCommit => 1, pg_enable_utf8 => 1, RaiseError => 1
57 ); # shouldn't need 'or die...' with RaiseError=>1
60 # I can't believe this isn't already in a sub somewhere? We seem to repeat
61 # these steps all over the place, which is very much "bad code smell."
63 my ($xml) = @_; # a string, not an object, to be clear
66 $xml =~ s/^<\?xml.+\?\s*>//go;
67 $xml =~ s/>\s+</></go;
70 return OpenILS::Application::AppUtils->entityize($xml);
73 sub matchable_string {
74 my ($field, $sf_list, $joiner) = @_;
77 return join($joiner, map { $field->subfield($_) } split "", $sf_list);
81 my ($start_id, $end_id);
82 my $bootstrap = '@sysconfdir@/opensrf_core.xml';
86 my $result = GetOptions(
88 'configuration=s' => \$bootstrap,
89 'record=i' => \@records,
90 'all', 'help', 'debug',
91 'start_id=i' => \$start_id,
92 'end_id=i' => \$end_id
95 pod2usage(0) if not $result or $options{help};
97 print "OpenSRF bootstrap and fieldmapper import...\n" if $options{debug};
98 OpenSRF::System->bootstrap_client(config_file => $bootstrap);
99 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
101 # must be loaded and initialized after the IDL is parsed
103 print "Loading CStoreEditor ...\n" if $options{debug};
105 use OpenILS::Utils::CStoreEditor;
106 OpenILS::Utils::CStoreEditor::init();
108 my $e = OpenILS::Utils::CStoreEditor->new;
113 ARRAY_TO_STRING(ARRAY_AGG(target || ',' || field), ';') AS links
115 SELECT sh1.record AS target,
116 sh2.record AS source,
118 FROM authority.simple_heading sh1
119 JOIN authority.simple_heading sh2 USING (sort_value)
120 JOIN authority.control_set_authority_field af1 ON (sh1.atag = af1.id AND af1.main_entry IS NULL)
121 JOIN authority.control_set_authority_field af2 ON (sh2.atag = af2.id AND af2.main_entry IS NOT NULL AND af2.linking_subfield IS NOT NULL)
122 %s -- where clause here
123 EXCEPT SELECT target, source, field FROM authority.authority_linking
129 $query = sprintf($query, "WHERE sh2.record = ?");
130 @bind_params = @records; # should be just one scalar in this array.
131 } elsif ($options{all}) {
132 $query = sprintf($query, ""); # no where clause
133 } elsif ($start_id and $end_id) {
134 $query = sprintf($query, "WHERE sh2.record BETWEEN ? AND ?");
135 @bind_params = ($start_id, $end_id);
140 print "SQL, params: ", Dumper($query, \@bind_params), "\n" if $options{debug};
141 my $dbh = connect_to_db; # dies if any problem
142 my $sth = $dbh->prepare($query);
144 print "Executing query ...\n" if $options{debug};
145 $sth->execute(@bind_params);
149 while (my ($src, $links) = $sth->fetchrow_array) {
150 print "src: $src\n" if $options{debug};
153 my $src_rec = $e->retrieve_authority_record_entry($src) or
155 my $src_marc = MARC::Record->new_from_xml($src_rec->marc);
157 for my $link (split ';', $links) {
158 my ($target, $field_id) = split ',', $link;
160 print "target: $target, field_id: $field_id\n" if $options{debug};
162 my $target_rec = $e->retrieve_authority_record_entry($target) or
164 my $target_marc = MARC::Record->new_from_xml($target_rec->marc);
165 my $cni = $target_marc->field('003')->data;
167 my $acsaf = get_acsaf($e, $field_id) or die $e->die_event;
169 for my $field ($src_marc->field($acsaf->tag)) {
170 my $src_string = matchable_string(
171 $field, $acsaf->main_entry->display_sf_list, $acsaf->main_entry->joiner
174 print("at field ", $acsaf->id, " (", $acsaf->tag,
175 "), trying to match '$src_string'...\n") if $options{debug};
177 my ($tfield) = $target_marc->field($acsaf->main_entry->tag);
178 my $target_string = matchable_string(
179 $tfield, $acsaf->main_entry->display_sf_list, $acsaf->main_entry->joiner
182 if ($target_string eq $src_string) {
183 print "got a match ...\n" if $options{debug};
184 $field->update('0' => "($cni)$target");
189 $src_rec->marc(marcxml_eg($src_marc->as_xml_record));
192 $e->update_authority_record_entry($src_rec) or
198 print STDERR "\nRecord # $src : ",
199 (ref $err eq "HASH" ? Dumper($err) : $err), "\n";
201 # Reset SAX parser so that one bad record doesn't
202 # kill the entire process.
204 import MARC::File::XML;
209 exit ($problems > 0);
215 authority_authority_linker.pl - Link reference headings in authority records to main entry headings in other authority records
219 authority_authority_linker.pl [B<--configuration>=I<opensrf_core.conf>]
220 [[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
224 For a given set of records, find authority reference headings that also
225 appear as main entry headings in any other authority record. In the
226 specific MARC field of the authority record (source) containing the reference
227 heading with such a match in another authority record (target), add a subfield
228 0 (zero) referring to the target record by ID.
234 =item * B<-c> I<config-file>, B<--configuration>=I<config-file>
236 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
237 Defaults to F<@sysconfdir@/opensrf_core.xml>
239 =item * B<-r> I<record-ID>, B<--record>=I<record-ID>
241 Specifies the authority record ID (found in the C<authority.record_entry.id>
242 column) of the B<source> record to process. This option may be specified more
243 than once to process multiple records in a single run.
245 =item * B<-a>, B<--all>
247 Specifies that all authority records should be processed. For large
248 databases, this may take an extraordinarily long amount of time.
250 =item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
252 Specifies the starting ID of the range of authority records to process.
253 This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
256 =item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
258 Specifies the ending ID of the range of authority records to process.
259 This option is ignored unless it is accompanied by the B<-s> or B<--start>
266 authority_authority_linker.pl --start_id 1 --end_id 50000
268 Processes the authority records with IDs between 1 and 50,000 using the
269 default OpenSRF configuration file for connection information.
273 Lebbeous Fogle-Weekley <lebbeous@esilibrary.com>
275 =head1 COPYRIGHT AND LICENSE
277 Copyright (C) 2013 Equinox Software, Inc.
279 This program is free software; you can redistribute it and/or
280 modify it under the terms of the GNU General Public License
281 as published by the Free Software Foundation; either version 2
282 of the License, or (at your option) any later version.
284 This program is distributed in the hope that it will be useful,
285 but WITHOUT ANY WARRANTY; without even the implied warranty of
286 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
287 GNU General Public License for more details.
289 You should have received a copy of the GNU General Public License
290 along with this program; if not, write to the Free Software
291 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA