1 package OpenILS::Application::Search::Z3950;
2 use strict; use warnings;
3 use base qw/OpenILS::Application/;
5 use OpenILS::Utils::ZClient;
7 use MARC::File::XML (BinaryEncoding => 'UTF-8');
9 use Unicode::Normalize;
13 use OpenSRF::EX qw(:try);
14 use OpenSRF::MultiSession;
15 use OpenILS::Utils::ModsParser;
16 use OpenSRF::Utils::SettingsClient;
17 use OpenSRF::Utils::JSON;
18 use OpenILS::Application::AppUtils;
19 use OpenSRF::Utils::Logger qw/$logger/;
20 use OpenILS::Utils::CStoreEditor q/:funcs/;
21 use OpenILS::Utils::Normalize qw/clean_marc/;
23 MARC::Charset->assume_unicode(1);
24 MARC::Charset->ignore_errors(1);
26 my $output = "usmarc";
27 my $U = 'OpenILS::Application::AppUtils';
33 __PACKAGE__->register_method(
34 method => 'apply_credentials',
35 api_name => 'open-ils.search.z3950.apply_credentials',
37 desc => "Apply credentials for a Z39.50 server",
39 {desc => 'Authtoken', type => 'string'},
40 {desc => 'Z39.50 Source (server) name', type => 'string'},
41 {desc => 'Context org unit', type => 'number'},
42 {desc => 'Username', type => 'string'},
43 {desc => 'Password', type => 'string'}
46 desc => 'Event; SUCCESS on success, other event type on error'
51 sub apply_credentials {
52 my ($self, $client, $auth, $source, $ctx_ou, $username, $password) = @_;
54 my $e = new_editor(authtoken => $auth, xact => 1);
56 return $e->die_event unless
58 $e->allowed('ADMIN_Z3950_SOURCE', $ctx_ou);
60 $e->json_query({from => [
61 'config.z3950_source_credentials_apply',
62 $source, $ctx_ou, $username, $password
63 ]}) or return $e->die_event;
67 return OpenILS::Event->new('SUCCESS');
72 __PACKAGE__->register_method(
73 method => 'do_class_search',
74 api_name => 'open-ils.search.z3950.search_class',
77 Performs a class based Z search. The classes available
78 are defined by the 'attr' fields in the config for the
80 @param auth The login session key
81 @param shash The search hash : { attr : value, attr2: value, ...}
82 @param service The service to connect to
83 @param username The username to use when connecting to the service
84 @param password The password to use when connecting to the service
88 __PACKAGE__->register_method(
89 method => 'do_service_search',
90 api_name => 'open-ils.search.z3950.search_service',
92 @param auth The login session key
93 @param query The Z3950 search string to use
94 @param service The service to connect to
95 @param username The username to use when connecting to the service
96 @param password The password to use when connecting to the service
101 __PACKAGE__->register_method(
102 method => 'do_service_search',
103 api_name => 'open-ils.search.z3950.search_raw',
105 @param auth The login session key
106 @param args An object of search params which must include:
107 host, port, db and query.
108 optional fields include username and password
113 __PACKAGE__->register_method(
114 method => "query_services",
115 api_name => "open-ils.search.z3950.retrieve_services",
117 @param auth The login session key
118 Returns a list of service names that we have config
125 # -------------------------------------------------------------------
126 # What services do we have config info for?
127 # -------------------------------------------------------------------
129 my( $self, $client, $auth ) = @_;
130 my $e = new_editor(authtoken=>$auth);
131 return $e->event unless $e->checkauth;
132 return $e->event unless $e->allowed('REMOTE_Z3950_QUERY');
134 return fetch_service_defs($e);
137 # -------------------------------------------------------------------
138 # What services do we have config info for?
139 # -------------------------------------------------------------------
140 sub fetch_service_defs {
142 my $editor_with_authtoken = shift;
144 my $hash = $sclient->config_value('z3950', 'services');
146 # overlay config file values with in-db values
147 my $e = $editor_with_authtoken || new_editor();
148 if($e->can('search_config_z3950_source')) {
150 my $sources = $e->search_config_z3950_source(
151 [ { name => { '!=' => undef } },
152 { flesh => 1, flesh_fields => { czs => ['attrs'] } } ]
155 for my $s ( @$sources ) {
156 $$hash{ $s->name } = {
162 record_format => $s->record_format,
163 transmission_format => $s->transmission_format,
165 use_perm => ($s->use_perm) ?
166 $e->retrieve_permission_perm_list($s->use_perm)->code : ''
169 for my $a ( @{ $s->attrs } ) {
170 $$hash{ $a->source }{attrs}{ $a->name } = {
174 format => $a->format,
175 source => $a->source,
176 truncation => $a->truncation,
182 # Define the set of native catalog services
183 # XXX There are i18n problems here, but let's get the staff client working first
184 # XXX Move into the DB?
185 $hash->{'native-evergreen-catalog'} = {
187 title => {code => 'title', label => 'Title'},
188 author => {code => 'author', label => 'Author'},
189 subject => {code => 'subject', label => 'Subject'},
190 keyword => {code => 'keyword', label => 'Keyword'},
191 tcn => {code => 'tcn', label => 'TCN'},
192 isbn => {code => 'isbn', label => 'ISBN'},
193 issn => {code => 'issn', label => 'ISSN'},
194 publisher => {code => 'publisher', label => 'Publisher'},
195 pubdate => {code => 'pubdate', label => 'Pub Date'},
196 item_type => {code => 'item_type', label => 'Item Type'},
200 # then filter out any services which the requestor lacks the perm for
201 if ($editor_with_authtoken) {
202 foreach my $s (keys %{ $hash }) {
203 if ($$hash{$s}{use_perm}) {
206 $e->requestor->ws_ou,
215 %services = %$hash; # cache these internally so we can actually use the db-configured sources
221 # -------------------------------------------------------------------
222 # Load the pre-defined Z server configs
223 # -------------------------------------------------------------------
225 $sclient = OpenSRF::Utils::SettingsClient->new();
226 $default_service = $sclient->config_value("z3950", "default" );
230 # -------------------------------------------------------------------
231 # High-level class based search.
232 # -------------------------------------------------------------------
233 sub do_class_search {
235 fetch_service_defs() unless (scalar(keys(%services)));
242 if (!ref($$args{service})) {
243 $$args{service} = [$$args{service}];
244 $$args{username} = [$$args{username}];
245 $$args{password} = [$$args{password}];
253 for (my $i = 0; $i < @{$$args{service}}; $i++) {
254 my %tmp_args = %$args;
255 $tmp_args{service} = $$args{service}[$i];
256 $tmp_args{username} = $$args{username}[$i];
257 $tmp_args{password} = $$args{password}[$i];
259 $logger->debug("z3950: service: $tmp_args{service}, async: $tmp_args{async}");
261 if ($tmp_args{service} eq 'native-evergreen-catalog') {
262 my $method = $self->method_lookup('open-ils.search.biblio.zstyle.staff');
264 $self->method_lookup('open-ils.search.biblio.zstyle.staff')->run($auth, \%tmp_args)
269 $tmp_args{query} = compile_query('and', $tmp_args{service}, $tmp_args{search});
271 my $res = do_service_search( $self, $conn, $auth, \%tmp_args );
273 if ($U->event_code($res)) {
274 $conn->respond($res) if $U->event_code($res);
277 push @services, $tmp_args{service};
278 push @results, $res->{result};
279 push @connections, $res->{connection};
281 $logger->debug("z3950: Result object: $results[$i], Connection object: $connections[$i]");
287 $logger->debug("z3950: Connections created");
289 return undef unless (@connections);
292 # local catalog search is not processed with other z39 results;
293 $$args{service} = [grep {$_ ne 'native-evergreen-catalog'} @{$$args{service}}];
295 @connections = grep {defined $_} @connections;
296 return undef unless @connections;
298 while ((my $index = OpenILS::Utils::ZClient::event( \@connections )) != 0) {
299 my $ev = $connections[$index - 1]->last_event();
300 $logger->debug("z3950: Received event $ev");
301 if ($ev == OpenILS::Utils::ZClient::EVENT_END()) {
302 my $munged = process_results( $results[$index - 1], $$args{limit}, $$args{offset}, $$args{service}[$index -1] );
303 $$munged{service} = $$args{service}[$index - 1];
304 $conn->respond($munged);
308 $logger->debug("z3950: Search Complete");
313 # -------------------------------------------------------------------
314 # This handles the host settings, but expects a fully formed z query
315 # -------------------------------------------------------------------
316 sub do_service_search {
318 fetch_service_defs() unless (scalar(keys(%services)));
325 my $info = $services{$$args{service}};
327 $$args{host} = $$info{host};
328 $$args{port} = $$info{port};
329 $$args{db} = $$info{db};
330 $logger->debug("z3950: do_search...");
332 return do_search( $self, $conn, $auth, $args );
337 # -------------------------------------------------------------------
338 # This is the low level search method. All config and query
339 # data must be provided to this method
340 # -------------------------------------------------------------------
343 fetch_service_defs() unless (scalar(keys(%services)));
350 my $host = $$args{host} or return undef;
351 my $port = $$args{port} or return undef;
352 my $db = $$args{db} or return undef;
353 my $query = $$args{query} or return undef;
354 my $async = $$args{async} || 0;
356 my $limit = $$args{limit} || 10;
357 my $offset = $$args{offset} || 0;
359 my $editor = new_editor(authtoken => $auth);
360 return $editor->event unless
361 $editor->checkauth and
362 $editor->allowed('REMOTE_Z3950_QUERY', $editor->requestor->ws_ou);
364 my $creds = $editor->json_query({from => [
365 'config.z3950_source_credentials_lookup',
366 $$args{service}, $editor->requestor->ws_ou
369 # use the caller-provided username/password if offered.
370 # otherwise, use the stored credentials.
371 my $username = $$args{username} || $creds->{username} || "";
372 my $password = $$args{password} || $creds->{password} || "";
374 my $tformat = $services{$args->{service}}->{transmission_format} || $output;
376 $logger->info("z3950: connecting to server $host:$port:$db as $username");
378 my $connection = OpenILS::Utils::ZClient->new(
382 password => $password,
384 preferredRecordSyntax => $tformat,
387 if( ! $connection ) {
388 $logger->error("z3950: Unable to connect to Z server: ".
389 "$host:$port:$db:$username:$password");
390 return OpenILS::Event->new('Z3950_LOGIN_FAILED') unless $connection;
397 $logger->info("z3950: query => $query");
400 $results = $connection->search_pqf( $query );
401 } catch Error with { $err = shift; };
403 return OpenILS::Event->new(
404 'Z3950_BAD_QUERY', payload => $query, debug => "$err") if $err;
406 return OpenILS::Event->new('Z3950_SEARCH_FAILED',
407 debug => $connection->errcode." => ".$connection->errmsg." : query = $query") unless $results;
409 $logger->info("z3950: search [$query] took ".(time - $start)." seconds");
411 return {result => $results, connection => $connection} if ($async);
413 my $munged = process_results($results, $limit, $offset, $$args{service});
414 $munged->{query} = $query;
420 # -------------------------------------------------------------------
421 # Takes a result batch and returns the hitcount and a list of xml
423 # -------------------------------------------------------------------
424 sub process_results {
426 fetch_service_defs() unless (scalar(keys(%services)));
429 my $limit = shift || 10;
430 my $offset = shift || 0;
433 my $rformat = $services{$service}->{record_format};
434 my $tformat = $services{$service}->{transmission_format} || $output;
436 $results->option(elementSetName => $rformat);
437 $results->option(preferredRecordSyntax => $tformat);
438 $logger->info("z3950: using record format '$rformat' and transmission format '$tformat'");
442 my $count = $$res{count} = $results->size;
444 $logger->info("z3950: '$service' search returned $count hits");
446 my $tend = $limit + $offset;
448 my $end = ($tend <= $count) ? $tend : $count;
450 for($offset..$end - 1) {
458 $logger->info("z3950: fetching record $_");
462 my $rec = $results->record($_);
464 if ($tformat eq 'usmarc') {
465 my $raw = $rec->raw();
466 if (length($raw) <= 99999) {
467 $marc = MARC::Record->new_from_usmarc($raw);
470 die "ISO2709 record is too large to process";
472 } elsif ($tformat eq 'xml') {
473 $marc = MARC::Record->new_from_xml($rec->raw());
475 die "Unsupported record transmission format $tformat"
478 $marcs = $U->entityize($marc->as_xml_record);
479 $marcs = $U->strip_ctrl_chars($marcs);
480 my $doc = XML::LibXML->new->parse_string($marcs);
481 $marcxml = $U->entityize($doc->documentElement->toString);
482 $marcxml = $U->strip_ctrl_chars($marcxml);
484 my $u = OpenILS::Utils::ModsParser->new();
485 $u->start_mods_batch( $marcxml );
486 $mods = $u->finish_mods_batch();
489 } catch Error with { $err = shift; };
491 push @records, { 'mvr' => $mods, 'marcxml' => $marcxml } unless $err;
492 $logger->error("z3950: bad XML : $err") if $err;
495 warn "\n\n$marcs\n\n";
499 $res->{records} = \@records;
505 # -------------------------------------------------------------------
506 # Compiles the class based search query
507 # -------------------------------------------------------------------
510 fetch_service_defs() unless (scalar(keys(%services)));
512 my $separator = shift;
516 my $count = scalar(keys %$hash);
519 $str .= "\@$separator " for (1..$count-1);
521 # -------------------------------------------------------------------
522 # "code" is the bib-1 "use attribute", "format" is the bib-1
523 # "structure attribute"
524 # -------------------------------------------------------------------
526 next unless ( exists $services{$service}->{attrs}->{$_} );
527 $str .= '@attr 1=' . $services{$service}->{attrs}->{$_}->{code} . # add the use attribute
528 ' @attr 4=' . $services{$service}->{attrs}->{$_}->{format}; # add the structure attribute
529 if (exists $services{$service}->{attrs}->{$_}->{truncation}
530 && $services{$service}->{attrs}->{$_}->{truncation} >= 0) {
531 $str .= ' @attr 5=' . $services{$service}->{attrs}->{$_}->{truncation};
533 $str .= " \"" . $$hash{$_} . "\" "; # add the search term
539 __PACKAGE__->register_method(
540 method => 'bucket_search_queue',
541 api_name => 'open-ils.search.z3950.bucket_search_queue',
543 # disable opensrf chunking so the caller can receive timely responses
547 Performs a Z39.50 search for every record in a bucket, using the
548 provided Z39.50 fields. Add all search results to the specified
549 Vandelay queue. If no source records or search results are found,
553 {desc => q/Authentication token/, type => 'string'},
554 {desc => q/Bucket ID/, type => 'number'},
555 {desc => q/Z39 Sources. List of czs.name/, type => 'array'},
556 {desc => q/Z39 Index Maps. List of czifm.id/, type => 'array'},
557 { desc => q/Vandelay arguments
558 queue_name -- required
566 desc => q/Object containing status information about the on-going search
569 bre_count : $num, -- number of bibs to search against
571 search_complete : $num,
575 This object will be streamed back with each milestone (search
577 Event object returned on failure
583 sub bucket_search_queue {
587 my $bucket_id = shift;
588 my $z_sources = shift;
589 my $z_indexes = shift;
590 my $vandelay = shift;
592 my $e = new_editor(authtoken => $auth);
593 return $e->event unless
595 $e->allowed('REMOTE_Z3950_QUERY') and
596 $e->allowed('CREATE_BIB_IMPORT_QUEUE');
598 # find the source bib records
600 my $bre_ids = $e->json_query({
601 select => {cbrebi => ['target_biblio_record_entry']},
603 where => {bucket => $bucket_id},
608 return {bre_count => 0} unless @$bre_ids;
610 $bre_ids = [ map {$_->{target_biblio_record_entry}} @$bre_ids ];
612 $z_indexes = $e->search_config_z3950_index_field_map({id => $z_indexes});
614 return OpenILS::Event->new('BAD_PARAMS',
615 note => q/No z_indexes/) unless @$z_indexes;
617 # build the Z39 queries for the source bib records
619 my $z_searches = compile_bucket_zsearch(
620 $e, $bre_ids, $z_sources, $z_indexes);
622 return $e->event unless $z_searches;
623 return {bre_count => 0} unless @$z_searches;
625 my $queue = create_z39_bucket_queue($e, $bucket_id, $vandelay);
626 return $e->event unless $queue;
628 send_and_queue_bucket_searches($conn, $e, $queue, $z_searches);
633 # create the queue for storing search results
634 sub create_z39_bucket_queue {
635 my ($e, $bucket_id, $vandelay) = @_;
637 my $existing = $e->search_vandelay_bib_queue({
638 name => $vandelay->{queue_name},
639 owner => $e->requestor->id
642 return $existing if $existing;
644 my $queue = Fieldmapper::vandelay::bib_queue->new;
645 $queue->match_bucket($bucket_id);
646 $queue->owner($e->requestor->id);
647 $queue->name($vandelay->{queue_name});
648 $queue->match_set($vandelay->{match_set});
651 unless ($e->create_vandelay_bib_queue($queue)) {
660 # sets the 901c value to the Z39 service and
661 # adds the record to the growing vandelay queue
662 # returns the number of successfully queued records
663 sub stamp_and_queue_results {
664 my ($e, $queue, $service, $bre_id, $result) = @_;
667 for my $rec (@{$result->{records}}) {
668 # insert z39 service as the 901z
669 my $marc = MARC::Record->new_from_xml(
670 $rec->{marcxml}, 'UTF-8', 'USMARC');
672 $marc->insert_fields_ordered(
673 MARC::Field->new('901', '', '', z => $service));
675 # put the record into the queue
676 my $qrec = Fieldmapper::vandelay::queued_bib_record->new;
677 $qrec->marc(clean_marc($marc));
678 $qrec->queue($queue->id);
681 if ($e->create_vandelay_queued_bib_record($qrec)) {
685 my $evt = $e->die_event;
686 $logger->error("z39: unable to queue record: $evt");
693 sub send_and_queue_bucket_searches {
694 my ($conn, $e, $queue, $z_searches) = @_;
696 my $max_parallel = $U->ou_ancestor_setting(
697 $e->requestor->ws_ou,
698 'cat.z3950.batch.max_parallel') || 5;
700 my $search_limit = $U->ou_ancestor_setting(
701 $e->requestor->ws_ou,
702 'cat.z3950.batch.max_results') || 5;
707 search_complete => 0,
711 # searches are about to be in flight
712 # let the caller know we're still alive
713 $conn->respond($response);
715 my $handle_search_result = sub {
716 my ($self, $req) = @_;
717 my $bre_id = $req->{req}->{_bre_id};
719 my @p = $req->{req}->payload->params;
720 $logger->debug("z39: multi-search response for request [$bre_id]".
721 OpenSRF::Utils::JSON->perl2JSON(\@p));
723 for my $resp (@{$req->{response}}) {
724 $response->{search_complete}++;
725 my $result = $resp->content or next;
726 my $service = $result->{service};
727 $response->{queue_count} +=
728 stamp_and_queue_results($e, $queue, $service, $bre_id, $result);
731 $conn->respond($response);
734 my $multi_ses = OpenSRF::MultiSession->new(
735 app => 'open-ils.search',
736 cap => $max_parallel,
738 success_handler => $handle_search_result
741 # note: mult-session blocks new requests when it hits max
742 # parallel, so we need to cacluate summary values up front.
744 $bre_uniq{$_->{bre_id}} = 1 for @$z_searches;
745 $response->{bre_count} = scalar(keys %bre_uniq);
746 $response->{search_count} += scalar(@$z_searches);
748 # let the caller know searches are on their way out
749 $conn->respond($response);
751 for my $search (@$z_searches) {
753 my $bre_id = delete $search->{bre_id};
754 $search->{limit} = $search_limit;
756 # toss it onto the multi-pile
757 my $req = $multi_ses->request(
758 'open-ils.search.z3950.search_class', $e->authtoken, $search);
760 $req->{_bre_id} = $bre_id;
763 $multi_ses->session_wait(1);
764 $response->{queue} = $queue;
765 $conn->respond($response);
769 # creates a series of Z39.50 searchs based on the
770 # in-bucket records and the selected sources and indexes
771 sub compile_bucket_zsearch {
772 my ($e, $bre_ids, $z_sources, $z_indexes) = @_;
774 # pre-load the metabib_field's we'll need for this batch
777 my @mb_fields = grep { $_->metabib_field } @$z_indexes;
779 @mb_fields = map { $_->metabib_field } @mb_fields;
780 my $field_objs = $e->search_config_metabib_field({id => \@mb_fields});
781 %mb_fields = map {$_->id => $_} @$field_objs;
784 # pre-load the z3950_attrs we'll need for this batch
787 my @z3950_attrs = grep { $_->z3950_attr } @$z_indexes;
789 @z3950_attrs = map { $_->z3950_attr } @z3950_attrs;
790 my $attr_objs = $e->search_config_z3950_attr({id => \@z3950_attrs});
791 %z3950_attrs = map {$_->id => $_} @$attr_objs;
794 # indexes with specific z3950_attr's take precedence
795 my @z_index_attrs = grep { $_->z3950_attr } @$z_indexes;
796 my @z_index_types = grep { !$_->z3950_attr } @$z_indexes;
798 # for each bib record, extract the indexed value for the selected indexes.
801 for my $bre_id (@$bre_ids) {
803 $z_searches{$bre_id} = {};
805 for my $z_index (@z_index_attrs, @z_index_types) {
808 if ($z_index->record_attr) {
810 my $attrs = $U->get_bre_attrs($bre_id, $e);
811 $bre_val = $attrs->{$bre_id}{$z_index->record_attr}{code};
813 } else { # metabib_field
814 my $fid = $z_index->metabib_field;
816 # the value for each field will be in the
817 # index class-specific table
818 my $entry_query = sprintf(
819 'search_metabib_%s_field_entry',
820 $mb_fields{$fid}->field_class);
822 my $entry = $e->$entry_query(
823 {field => $fid, source => $bre_id})->[0];
825 $bre_val = $entry->value if $entry;
828 # no value means no search
829 next unless $bre_val;
831 # determine which z3950 source to send this search field to
835 if ($z_index->z3950_attr) {
837 # a specific z3950_attr means this search index
838 # only applies to the z_source linked to the attr
840 $z_index_name = $z3950_attrs{$z_index->z3950_attr}->name;
841 my $src = $z3950_attrs{$z_index->z3950_attr}->source;
843 if (grep { $_ eq $src } @$z_sources) {
844 $z_searches{$bre_id}{$src} ||= {
848 $z_searches{$bre_id}{$src}{search}{$z_index_name} = $bre_val;
851 $logger->warn("z39: z3950_attr '$z_index_name' for '$src'".
852 " selected, but $src is not in the search list. Skipping...");
857 # when a generic attr type is used, it applies to all
858 # z-sources, except those for which a more specific
859 # z3950_attr has already been applied
861 $z_index_name = $z_index->z3950_attr_type;
864 for my $attr (values %z3950_attrs) {
865 push(@excluded, $attr->source)
866 if $attr->name eq $z_index_name;
869 for my $src (@$z_sources) {
870 next if grep {$_ eq $src} @excluded;
871 $z_searches{$bre_id}{$src} ||= {
875 $z_searches{$bre_id}{$src}{search}{$z_index_name} = $bre_val;
881 # NOTE: ISBNs are sent through the translate_isbn1013 normalize
882 # before entring metabib.identifier_field_entry. As such, there
883 # will always be at minimum 2 ISBNs per record w/ ISBN and the
884 # data will be pre-sanitized. The first ISBN in the list is the
885 # ISBN from the record. Use that for these searches.
886 for my $bre_id (keys %z_searches) {
887 for my $src (keys %{$z_searches{$bre_id}}) {
888 my $blob = $z_searches{$bre_id}{$src};
890 # Sanitized ISBNs are space-separated.
891 # kill everything past the first space
892 $blob->{search}{isbn} =~ s/\s.*//g if $blob->{search}{isbn};
896 # let's turn this into something slightly more digestable
898 for my $bre_id (keys %z_searches) {
899 for my $blobset (values %{$z_searches{$bre_id}}) {
900 $blobset = [$blobset] unless ref $blobset eq 'ARRAY';
901 for my $blob (@$blobset) {
902 $blob->{bre_id} = $bre_id;
903 push(@searches, $blob);