1 --Upgrade Script for 2.2.2 to 2.2.3
2 \set eg_version '''2.2.3'''
4 INSERT INTO config.upgrade_log (version, applied_to) VALUES ('2.2.3', :eg_version);
6 -- 0738.schema.vandelay.import-match-no-like-any.sql
8 SELECT evergreen.upgrade_deps_block_check('0738', :eg_version);
10 CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml(
11 match_set_id INTEGER, record_xml TEXT
12 ) RETURNS SETOF vandelay.match_set_test_result AS $$
23 tags_rstore := vandelay.flatten_marc_hstore(record_xml);
24 svf_rstore := vandelay.extract_rec_attrs(record_xml);
26 CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
27 CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
29 -- generate the where clause and return that directly (into wq), and as
30 -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
31 wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore);
33 query_ := 'SELECT DISTINCT(record), ';
35 -- qrows table is for the quality bits we add to the SELECT clause
36 SELECT ARRAY_TO_STRING(
37 ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + '
38 ) INTO coal FROM _vandelay_tmp_qrows;
40 -- our query string so far is the SELECT clause and the inital FROM.
41 -- no JOINs yet nor the WHERE clause
42 query_ := query_ || coal || ' AS quality ' || E'\n';
44 -- jrows table is for the joins we must make (and the real text conditions)
45 SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins
46 FROM _vandelay_tmp_jrows;
48 -- add those joins and the where clause to our query.
49 query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted';
51 -- this will return rows of record,quality
52 FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP
56 DROP TABLE _vandelay_tmp_qrows;
57 DROP TABLE _vandelay_tmp_jrows;
63 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
68 root vandelay.match_set_point;
70 SELECT * INTO root FROM vandelay.match_set_point
71 WHERE parent IS NULL AND match_set = match_set_id;
73 RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore);
77 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
78 node vandelay.match_set_point,
86 child vandelay.match_set_point;
88 SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point
89 WHERE parent = node.id;
91 IF ARRAY_LENGTH(children, 1) > 0 THEN
92 this_op := vandelay._get_expr_render_one(node);
95 WHILE children[i] IS NOT NULL LOOP
96 SELECT * INTO child FROM vandelay.match_set_point
97 WHERE id = children[i];
99 q := q || ' ' || this_op || ' ';
102 q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore);
106 ELSIF node.bool_op IS NULL THEN
107 PERFORM vandelay._get_expr_push_qrow(node);
108 PERFORM vandelay._get_expr_push_jrow(node, tags_rstore);
109 RETURN vandelay._get_expr_render_one(node);
116 CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
117 node vandelay.match_set_point,
130 -- remember $1 is tags_rstore, and $2 is svf_rstore
133 SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
134 IF jrow_count > 0 THEN
135 my_using := ' USING (record)';
136 my_join := 'FULL OUTER JOIN';
142 IF node.tag IS NOT NULL THEN
143 caseless := (node.tag IN ('020', '022', '024'));
145 IF node.subfield IS NOT NULL THEN
146 tagkey := tagkey || node.subfield;
164 my_alias := 'n' || node.id::TEXT;
166 jrow := my_join || ' (SELECT *, ';
167 IF node.tag IS NOT NULL THEN
168 jrow := jrow || node.quality ||
169 ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' ||
171 IF node.subfield IS NOT NULL THEN
172 jrow := jrow || ' AND mfr.subfield = ''' ||
173 node.subfield || '''';
175 jrow := jrow || ' AND (';
176 jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
177 jrow := jrow || ')) ' || my_alias || my_using || E'\n';
179 jrow := jrow || 'id AS record, ' || node.quality ||
180 ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' ||
181 node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' ||
182 my_alias || my_using || E'\n';
184 INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
188 CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons(
200 vals := tags_rstore->tagkey;
205 IF vals[i] IS NULL THEN
208 result := result || ' OR ';
213 result := result || 'LOWER(mfr.value) ' || op;
215 result := result || 'mfr.value ' || op;
218 result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL');
220 IF vals[i] IS NULL THEN
231 -- drop old versions of these functions with fewer args
232 DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER );
233 DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point );
234 DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point );
236 -- This next index might fully supplant an existing one but leaving both for now
237 -- (they are not too large)
238 -- The reason we need this index is to ensure that the query parser always
239 -- prefers this index over the simpler tag/subfield index, as this greatly
240 -- increases Vandelay overlay speed for these identifiers, especially when
241 -- a record has many of these fields (around > 4-6 seems like the cutoff
242 -- on at least one PG9.1 system)
243 -- A similar index could be added for other fields (e.g. 010), but one should
244 -- leave out the LOWER() in all other cases.
245 -- TODO: verify whether we can discard the non tag/subfield/substring version
246 -- (metabib_full_rec_isxn_caseless_idx)
247 CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring
248 ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024)))
249 WHERE tag IN ('020', '022', '024');
253 SELECT evergreen.upgrade_deps_block_check('0740', :eg_version);
256 FUNCTION metabib.suggest_browse_entries(
257 raw_query_text TEXT, -- actually typed by humans at the UI level
258 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
259 headline_opts TEXT, -- markup options for ts_headline()
260 visibility_org INTEGER,-- null if you don't want opac visibility test
261 query_limit INTEGER,-- use in LIMIT clause of interal query
262 normalization INTEGER -- argument to TS_RANK_CD()
266 buoyant_and_class_match BOOL,
268 field_weight INTEGER,
271 match TEXT -- marked up
274 prepared_query_texts TEXT[];
277 opac_visibility_join TEXT;
278 search_class_join TEXT;
281 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
283 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
284 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
286 visibility_org := NULLIF(visibility_org,-1);
287 IF visibility_org IS NOT NULL THEN
288 opac_visibility_join := '
289 JOIN asset.opac_visible_copies aovc ON (
290 aovc.record = x.source AND
291 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
294 opac_visibility_join := '';
297 -- The following determines whether we only provide suggestsons matching
298 -- the user's selected search_class, or whether we show other suggestions
299 -- too. The reason for MIN() is that for search_classes like
300 -- 'title|proper|uniform' you would otherwise get multiple rows. The
301 -- implication is that if title as a class doesn't have restrict,
302 -- nor does the proper field, but the uniform field does, you're going
303 -- to get 'false' for your overall evaluation of 'should we restrict?'
304 -- To invert that, change from MIN() to MAX().
308 MIN(cmc.restrict::INT) AS restrict_class,
309 MIN(cmf.restrict::INT) AS restrict_field
310 FROM metabib.search_class_to_registered_components(search_class)
311 AS _registered (field_class TEXT, field INT)
313 config.metabib_class cmc ON (cmc.name = _registered.field_class)
315 config.metabib_field cmf ON (cmf.id = _registered.field);
317 -- evaluate 'should we restrict?'
318 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
319 search_class_join := '
321 metabib.search_class_to_registered_components($2)
322 AS _registered (field_class TEXT, field INT) ON (
323 (_registered.field IS NULL AND
324 _registered.field_class = cmf.field_class) OR
325 (_registered.field = cmf.id)
329 search_class_join := '
331 metabib.search_class_to_registered_components($2)
332 AS _registered (field_class TEXT, field INT) ON (
333 _registered.field_class = cmc.name
338 RETURN QUERY EXECUTE '
347 TS_HEADLINE(value, $7, $3)
348 FROM (SELECT DISTINCT
351 cmc.buoyant AND _registered.field_class IS NOT NULL AS push,
352 _registered.field = cmf.id AS restrict,
354 TS_RANK_CD(mbe.index_vector, $1, $6),
357 FROM metabib.browse_entry_def_map mbedm
358 JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry)
359 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
360 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
361 ' || search_class_join || '
362 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
364 ' || opac_visibility_join || '
365 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
367 ' -- sic, repeat the order by clause in the outer select too
369 query, search_class, headline_opts,
370 visibility_org, query_limit, normalization, plain_query
374 -- buoyant AND chosen class = match class
375 -- chosen field = match field
382 $func$ LANGUAGE PLPGSQL;
385 SELECT evergreen.upgrade_deps_block_check('0742', :eg_version);
387 -- Prepare for the July 2013 introduction of OCLC's "on" prefix
390 CREATE OR REPLACE FUNCTION maintain_control_numbers() RETURNS TRIGGER AS $func$
393 use MARC::File::XML (BinaryEncoding => 'UTF-8');
396 use Unicode::Normalize;
398 MARC::Charset->assume_unicode(1);
400 my $record = MARC::Record->new_from_xml($_TD->{new}{marc});
401 my $schema = $_TD->{table_schema};
402 my $rec_id = $_TD->{new}{id};
404 # Short-circuit if maintaining control numbers per MARC21 spec is not enabled
405 my $enable = spi_exec_query("SELECT enabled FROM config.global_flag WHERE name = 'cat.maintain_control_numbers'");
406 if (!($enable->{processed}) or $enable->{rows}[0]->{enabled} eq 'f') {
410 # Get the control number identifier from an OU setting based on $_TD->{new}{owner}
411 my $ou_cni = 'EVRGRN';
414 if ($schema eq 'serial') {
415 $owner = $_TD->{new}{owning_lib};
417 # are.owner and bre.owner can be null, so fall back to the consortial setting
418 $owner = $_TD->{new}{owner} || 1;
421 my $ous_rv = spi_exec_query("SELECT value FROM actor.org_unit_ancestor_setting('cat.marc_control_number_identifier', $owner)");
422 if ($ous_rv->{processed}) {
423 $ou_cni = $ous_rv->{rows}[0]->{value};
424 $ou_cni =~ s/"//g; # Stupid VIM syntax highlighting"
426 # Fall back to the shortname of the OU if there was no OU setting
427 $ous_rv = spi_exec_query("SELECT shortname FROM actor.org_unit WHERE id = $owner");
428 if ($ous_rv->{processed}) {
429 $ou_cni = $ous_rv->{rows}[0]->{shortname};
433 my ($create, $munge) = (0, 0);
435 my @scns = $record->field('035');
437 foreach my $id_field ('001', '003') {
439 my @controls = $record->field($id_field);
441 if ($id_field eq '001') {
442 $spec_value = $rec_id;
444 $spec_value = $ou_cni;
447 # Create the 001/003 if none exist
448 if (scalar(@controls) == 1) {
449 # Only one field; check to see if we need to munge it
450 unless (grep $_->data() eq $spec_value, @controls) {
454 # Delete the other fields, as with more than 1 001/003 we do not know which 003/001 to match
455 foreach my $control (@controls) {
456 $record->delete_field($control);
458 $record->insert_fields_ordered(MARC::Field->new($id_field, $spec_value));
463 my $cn = $record->field('001')->data();
464 # Special handling of OCLC numbers, often found in records that lack 003
465 if ($cn =~ /^o(c[nm]|n)\d/) {
466 $cn =~ s/^o(c[nm]|n)0*(\d+)/$2/;
467 $record->field('003')->data('OCoLC');
471 # Now, if we need to munge the 001, we will first push the existing 001/003
472 # into the 035; but if the record did not have one (and one only) 001 and 003
473 # to begin with, skip this process
474 if ($munge and not $create) {
476 my $scn = "(" . $record->field('003')->data() . ")" . $cn;
478 # Do not create duplicate 035 fields
479 unless (grep $_->subfield('a') eq $scn, @scns) {
480 $record->insert_fields_ordered(MARC::Field->new('035', '', '', 'a' => $scn));
484 # Set the 001/003 and update the MARC
485 if ($create or $munge) {
486 $record->field('001')->data($rec_id);
487 $record->field('003')->data($ou_cni);
489 my $xml = $record->as_xml_record();
491 $xml =~ s/^<\?xml.+\?\s*>//go;
492 $xml =~ s/>\s+</></go;
493 $xml =~ s/\p{Cc}//go;
495 # Embed a version of OpenILS::Application::AppUtils->entityize()
496 # to avoid having to set PERL5LIB for PostgreSQL as well
498 # If we are going to convert non-ASCII characters to XML entities,
499 # we had better be dealing with a UTF8 string to begin with
500 $xml = decode_utf8($xml);
504 # Convert raw ampersands to entities
505 $xml =~ s/&(?!\S+;)/&/gso;
507 # Convert Unicode characters to entities
508 $xml =~ s/([\x{0080}-\x{fffd}])/sprintf('&#x%X;',ord($1))/sgoe;
510 $xml =~ s/[\x00-\x1f]//go;
511 $_TD->{new}{marc} = $xml;
517 $func$ LANGUAGE PLPERLU;