BEGIN; -- 0738.schema.vandelay.import-match-no-like-any.sql SELECT evergreen.upgrade_deps_block_check('0738', :eg_version); CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml( match_set_id INTEGER, record_xml TEXT ) RETURNS SETOF vandelay.match_set_test_result AS $$ DECLARE tags_rstore HSTORE; svf_rstore HSTORE; coal TEXT; joins TEXT; query_ TEXT; wq TEXT; qvalue INTEGER; rec RECORD; BEGIN tags_rstore := vandelay.flatten_marc_hstore(record_xml); svf_rstore := vandelay.extract_rec_attrs(record_xml); CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER); CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT); -- generate the where clause and return that directly (into wq), and as -- a side-effect, populate the _vandelay_tmp_[qj]rows tables. wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore); query_ := 'SELECT DISTINCT(record), '; -- qrows table is for the quality bits we add to the SELECT clause SELECT ARRAY_TO_STRING( ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + ' ) INTO coal FROM _vandelay_tmp_qrows; -- our query string so far is the SELECT clause and the inital FROM. -- no JOINs yet nor the WHERE clause query_ := query_ || coal || ' AS quality ' || E'\n'; -- jrows table is for the joins we must make (and the real text conditions) SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins FROM _vandelay_tmp_jrows; -- add those joins and the where clause to our query. query_ := query_ || joins || E'\n' || 'JOIN biblio.record_entry bre ON (bre.id = record) ' || 'WHERE ' || wq || ' AND not bre.deleted'; -- this will return rows of record,quality FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP RETURN NEXT rec; END LOOP; DROP TABLE _vandelay_tmp_qrows; DROP TABLE _vandelay_tmp_jrows; RETURN; END; $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set( match_set_id INTEGER, tags_rstore HSTORE ) RETURNS TEXT AS $$ DECLARE root vandelay.match_set_point; BEGIN SELECT * INTO root FROM vandelay.match_set_point WHERE parent IS NULL AND match_set = match_set_id; RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore); END; $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point( node vandelay.match_set_point, tags_rstore HSTORE ) RETURNS TEXT AS $$ DECLARE q TEXT; i INTEGER; this_op TEXT; children INTEGER[]; child vandelay.match_set_point; BEGIN SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point WHERE parent = node.id; IF ARRAY_LENGTH(children, 1) > 0 THEN this_op := vandelay._get_expr_render_one(node); q := '('; i := 1; WHILE children[i] IS NOT NULL LOOP SELECT * INTO child FROM vandelay.match_set_point WHERE id = children[i]; IF i > 1 THEN q := q || ' ' || this_op || ' '; END IF; i := i + 1; q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore); END LOOP; q := q || ')'; RETURN q; ELSIF node.bool_op IS NULL THEN PERFORM vandelay._get_expr_push_qrow(node); PERFORM vandelay._get_expr_push_jrow(node, tags_rstore); RETURN vandelay._get_expr_render_one(node); ELSE RETURN ''; END IF; END; $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow( node vandelay.match_set_point, tags_rstore HSTORE ) RETURNS VOID AS $$ DECLARE jrow TEXT; my_alias TEXT; op TEXT; tagkey TEXT; caseless BOOL; jrow_count INT; my_using TEXT; my_join TEXT; BEGIN -- remember $1 is tags_rstore, and $2 is svf_rstore caseless := FALSE; SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows; IF jrow_count > 0 THEN my_using := ' USING (record)'; my_join := 'FULL OUTER JOIN'; ELSE my_using := ''; my_join := 'FROM'; END IF; IF node.tag IS NOT NULL THEN caseless := (node.tag IN ('020', '022', '024')); tagkey := node.tag; IF node.subfield IS NOT NULL THEN tagkey := tagkey || node.subfield; END IF; END IF; IF node.negate THEN IF caseless THEN op := 'NOT LIKE'; ELSE op := '<>'; END IF; ELSE IF caseless THEN op := 'LIKE'; ELSE op := '='; END IF; END IF; my_alias := 'n' || node.id::TEXT; jrow := my_join || ' (SELECT *, '; IF node.tag IS NOT NULL THEN jrow := jrow || node.quality || ' AS quality FROM metabib.full_rec mfr WHERE mfr.tag = ''' || node.tag || ''''; IF node.subfield IS NOT NULL THEN jrow := jrow || ' AND mfr.subfield = ''' || node.subfield || ''''; END IF; jrow := jrow || ' AND ('; jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey); jrow := jrow || ')) ' || my_alias || my_using || E'\n'; ELSE -- svf jrow := jrow || 'id AS record, ' || node.quality || ' AS quality FROM metabib.record_attr mra WHERE mra.attrs->''' || node.svf || ''' ' || op || ' $2->''' || node.svf || ''') ' || my_alias || my_using || E'\n'; END IF; INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow); END; $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons( caseless BOOLEAN, op TEXT, tags_rstore HSTORE, tagkey TEXT ) RETURNS TEXT AS $$ DECLARE result TEXT; i INT; vals TEXT[]; BEGIN i := 1; vals := tags_rstore->tagkey; result := ''; WHILE TRUE LOOP IF i > 1 THEN IF vals[i] IS NULL THEN EXIT; ELSE result := result || ' OR '; END IF; END IF; IF caseless THEN result := result || 'LOWER(mfr.value) ' || op; ELSE result := result || 'mfr.value ' || op; END IF; result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL'); IF vals[i] IS NULL THEN EXIT; END IF; i := i + 1; END LOOP; RETURN result; END; $$ LANGUAGE PLPGSQL; -- drop old versions of these functions with fewer args DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER ); DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point ); DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point ); -- This next index might fully supplant an existing one but leaving both for now -- (they are not too large) -- The reason we need this index is to ensure that the query parser always -- prefers this index over the simpler tag/subfield index, as this greatly -- increases Vandelay overlay speed for these identifiers, especially when -- a record has many of these fields (around > 4-6 seems like the cutoff -- on at least one PG9.1 system) -- A similar index could be added for other fields (e.g. 010), but one should -- leave out the LOWER() in all other cases. -- TODO: verify whether we can discard the non tag/subfield/substring version -- (metabib_full_rec_isxn_caseless_idx) CREATE INDEX metabib_full_rec_02x_tag_subfield_lower_substring ON metabib.real_full_rec (tag, subfield, LOWER(substring(value, 1, 1024))) WHERE tag IN ('020', '022', '024'); COMMIT;