8 use MARC::File::XML (BinaryEncoding => 'UTF-8');
11 use OpenILS::Utils::Fieldmapper;
12 use OpenSRF::Utils::SettingsClient;
13 use OpenSRF::EX qw/:try/;
15 use Unicode::Normalize;
16 use OpenILS::Application::AppUtils;
18 use Pod::Usage qw/ pod2usage /;
20 MARC::Charset->assume_unicode(1);
27 $acsaf_cache->{$id} ||=
28 $e->retrieve_authority_control_set_authority_field([
30 {flesh => 1, flesh_fields => {acsaf => ["main_entry"]}}
32 return $acsaf_cache->{$id};
35 # Grab DB information from local settings. Return connected db handle (or die)
37 my $sc = OpenSRF::Utils::SettingsClient->new;
38 my $db_driver = $sc->config_value( reporter => setup => database => 'driver' );
39 my $db_host = $sc->config_value( reporter => setup => database => 'host' );
40 my $db_port = $sc->config_value( reporter => setup => database => 'port' );
41 my $db_name = $sc->config_value( reporter => setup => database => 'db' );
43 $db_name = $sc->config_value( reporter => setup => database => 'name' );
44 print STDERR "WARN: <database><name> is a deprecated setting for database name. For future compatibility, you should use <database><db> instead." if $db_name;
46 my $db_user = $sc->config_value( reporter => setup => database => 'user' );
47 my $db_pw = $sc->config_value( reporter => setup => database => 'pw' );
49 die "Unable to retrieve database connection information from the settings server" unless ($db_driver && $db_host && $db_port && $db_name && $db_user);
51 my $dsn = "dbi:" . $db_driver . ":dbname=" . $db_name .';host=' . $db_host . ';port=' . $db_port;
54 $dsn,$db_user,$db_pw, {
55 AutoCommit => 1, pg_enable_utf8 => 1, RaiseError => 1
57 ); # shouldn't need 'or die...' with RaiseError=>1
60 # I can't believe this isn't already in a sub somewhere? We seem to repeat
61 # these steps all over the place, which is very much "bad code smell."
63 my ($xml) = @_; # a string, not an object, to be clear
66 $xml =~ s/^<\?xml.+\?\s*>//go;
67 $xml =~ s/>\s+</></go;
70 return OpenILS::Application::AppUtils->entityize($xml);
73 sub matchable_string {
74 my ($field, $sf_list) = @_;
76 return join("", map { $field->subfield($_) } split "", $sf_list);
80 my ($start_id, $end_id);
81 my $bootstrap = '@sysconfdir@/opensrf_core.xml';
85 my $result = GetOptions(
87 'configuration=s' => \$bootstrap,
88 'record=i' => \@records,
89 'all', 'help', 'debug',
90 'start_id=i' => \$start_id,
91 'end_id=i' => \$end_id
94 pod2usage(0) if not $result or $options{help};
96 print "OpenSRF bootstrap and fieldmapper import...\n" if $options{debug};
97 OpenSRF::System->bootstrap_client(config_file => $bootstrap);
98 Fieldmapper->import(IDL => OpenSRF::Utils::SettingsClient->new->config_value("IDL"));
100 # must be loaded and initialized after the IDL is parsed
102 print "Loading CStoreEditor ...\n" if $options{debug};
104 use OpenILS::Utils::CStoreEditor;
105 OpenILS::Utils::CStoreEditor::init();
107 my $e = OpenILS::Utils::CStoreEditor->new;
112 ARRAY_TO_STRING(ARRAY_AGG(target || ',' || field), ';') AS links
114 SELECT sh1.record AS target,
115 sh2.record AS source,
117 FROM authority.simple_heading sh1
118 JOIN authority.simple_heading sh2 USING (sort_value)
119 JOIN authority.control_set_authority_field af1 ON (sh1.atag = af1.id AND af1.main_entry IS NULL)
120 JOIN authority.control_set_authority_field af2 ON (sh2.atag = af2.id AND af2.main_entry IS NOT NULL AND af2.linking_subfield IS NOT NULL)
121 %s -- where clause here
122 EXCEPT SELECT target, source, field FROM authority.authority_linking
128 $query = sprintf($query, "WHERE sh2.record = ?");
129 @bind_params = @records; # should be just one scalar in this array.
130 } elsif ($options{all}) {
131 $query = sprintf($query, ""); # no where clause
132 } elsif ($start_id and $end_id) {
133 $query = sprintf($query, "WHERE sh2.record BETWEEN ? AND ?");
134 @bind_params = ($start_id, $end_id);
139 print "SQL, params: ", Dumper($query, \@bind_params), "\n" if $options{debug};
140 my $dbh = connect_to_db; # dies if any problem
141 my $sth = $dbh->prepare($query);
143 print "Executing query ...\n" if $options{debug};
144 $sth->execute(@bind_params);
148 while (my ($src, $links) = $sth->fetchrow_array) {
149 print "src: $src\n" if $options{debug};
151 my $per_src_target_cache = {};
153 my $src_rec = $e->retrieve_authority_record_entry($src) or
155 my $src_marc = MARC::Record->new_from_xml($src_rec->marc);
157 LINK: for my $link (split ';', $links) {
158 my ($target, $field_id) = split ',', $link;
160 print "target: $target, field_id: $field_id\n" if $options{debug};
162 my $target_rec = ($per_src_target_cache->{$src} ||=
163 $e->retrieve_authority_record_entry($target)) or
165 my $target_marc = MARC::Record->new_from_xml($target_rec->marc);
166 my $cni = $target_marc->field('003')->data;
168 my $acsaf = get_acsaf($e, $field_id) or die $e->die_event;
170 for my $field ($src_marc->field($acsaf->tag)) {
171 my $src_string = matchable_string(
172 $field, $acsaf->main_entry->display_sf_list
175 print("at field ", $acsaf->id, " (", $acsaf->tag,
176 "), trying to match '$src_string'...\n") if $options{debug};
178 for my $tfield ($target_marc->field($acsaf->main_entry->tag)) {
179 my $target_string = matchable_string(
180 $tfield, $acsaf->main_entry->display_sf_list
183 if ($target_string eq $src_string) {
184 print "got a match ...\n" if $options{debug};
185 $field->update('0' => "($cni)$target");
186 $src_rec->marc(marcxml_eg($src_marc->as_xml_record));
189 $e->update_authority_record_entry($src_rec) or
199 print STDERR "\nRecord # $src : ",
200 (ref $err eq "HASH" ? Dumper($err) : $err), "\n";
202 # Reset SAX parser so that one bad record doesn't
203 # kill the entire process.
205 import MARC::File::XML;
210 exit ($problems > 0);
216 authority_authority_linker.pl - Link reference headings in authority records to main entry headings in other authority records
220 authority_authority_linker.pl [B<--configuration>=I<opensrf_core.conf>]
221 [[B<--record>=I<record>[ B<--record>=I<record>]]] | [B<--all>] | [B<--start_id>=I<start-ID> B<--end_id>=I<end-ID>]
225 For a given set of records, find authority reference headings that also
226 appear as main entry headings in any other authority record. In the
227 specific MARC field of the authority record (source) containing the reference
228 heading with such a match in another authority record (target), add a subfield
229 0 (zero) referring to the target record by ID.
235 =item * B<-c> I<config-file>, B<--configuration>=I<config-file>
237 Specifies the OpenSRF configuration file used to connect to the OpenSRF router.
238 Defaults to F<@sysconfdir@/opensrf_core.xml>
240 =item * B<-r> I<record-ID>, B<--record>=I<record-ID>
242 Specifies the authority record ID (found in the C<authority.record_entry.id>
243 column) of the B<source> record to process. This option may be specified more
244 than once to process multiple records in a single run.
246 =item * B<-a>, B<--all>
248 Specifies that all authority records should be processed. For large
249 databases, this may take an extraordinarily long amount of time.
251 =item * B<-s> I<start-ID>, B<--start_id>=I<start-ID>
253 Specifies the starting ID of the range of authority records to process.
254 This option is ignored unless it is accompanied by the B<-e> or B<--end_id>
257 =item * B<-e> I<end-ID>, B<--end_id>=I<end-ID>
259 Specifies the ending ID of the range of authority records to process.
260 This option is ignored unless it is accompanied by the B<-s> or B<--start>
267 authority_authority_linker.pl --start_id 1 --end_id 50000
269 Processes the authority records with IDs between 1 and 50,000 using the
270 default OpenSRF configuration file for connection information.
274 Lebbeous Fogle-Weekley <lebbeous@esilibrary.com>
276 =head1 COPYRIGHT AND LICENSE
278 Copyright (C) 2013 Equinox Software, Inc.
280 This program is free software; you can redistribute it and/or
281 modify it under the terms of the GNU General Public License
282 as published by the Free Software Foundation; either version 2
283 of the License, or (at your option) any later version.
285 This program is distributed in the hope that it will be useful,
286 but WITHOUT ANY WARRANTY; without even the implied warranty of
287 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
288 GNU General Public License for more details.
290 You should have received a copy of the GNU General Public License
291 along with this program; if not, write to the Free Software
292 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA