From 579c2cb8e7bcbc8819ca4e625b0c8c9488870c71 Mon Sep 17 00:00:00 2001 From: Lebbeous Fogle-Weekley Date: Mon, 25 Jun 2012 20:00:33 -0400 Subject: [PATCH] Vandelay: Fix index-miss with MARC Imports using Match Sets In some Postgres installations, an expression such as: (value LIKE '13423488%' OR value LIKE '245425%') will use a btree index, but value LIKE ANY('{13423488%,245425%}'::TEXT[]) will not. Missing such an index can make matching incoming bibs based on a field that's present in most of your existing records terrifically slow. Signed-off-by: Lebbeous Fogle-Weekley Signed-off-by: Dan Wells --- Open-ILS/src/sql/Pg/012.schema.vandelay.sql | 70 ++++-- ...hema.vandelay.import-match-no-like-any.sql | 228 ++++++++++++++++++ 2 files changed, 284 insertions(+), 14 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql diff --git a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql index c2bf418645..49aad7d0b8 100644 --- a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql +++ b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql @@ -508,7 +508,7 @@ BEGIN -- generate the where clause and return that directly (into wq), and as -- a side-effect, populate the _vandelay_tmp_[qj]rows tables. - wq := vandelay.get_expr_from_match_set(match_set_id); + wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore); query_ := 'SELECT DISTINCT(bre.id) AS record, '; @@ -571,7 +571,8 @@ END; $func$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set( - match_set_id INTEGER + match_set_id INTEGER, + tags_rstore HSTORE ) RETURNS TEXT AS $$ DECLARE root vandelay.match_set_point; @@ -579,12 +580,13 @@ BEGIN SELECT * INTO root FROM vandelay.match_set_point WHERE parent IS NULL AND match_set = match_set_id; - RETURN vandelay.get_expr_from_match_set_point(root); + RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore); END; $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point( - node vandelay.match_set_point + node vandelay.match_set_point, + tags_rstore HSTORE ) RETURNS TEXT AS $$ DECLARE q TEXT; @@ -607,13 +609,13 @@ BEGIN q := q || ' ' || this_op || ' '; END IF; i := i + 1; - q := q || vandelay.get_expr_from_match_set_point(child); + q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore); END LOOP; q := q || ')'; RETURN q; ELSIF node.bool_op IS NULL THEN PERFORM vandelay._get_expr_push_qrow(node); - PERFORM vandelay._get_expr_push_jrow(node); + PERFORM vandelay._get_expr_push_jrow(node, tags_rstore); RETURN vandelay._get_expr_render_one(node); ELSE RETURN ''; @@ -631,7 +633,8 @@ END; $$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow( - node vandelay.match_set_point + node vandelay.match_set_point, + tags_rstore HSTORE ) RETURNS VOID AS $$ DECLARE jrow TEXT; @@ -680,13 +683,8 @@ BEGIN END IF; jrow := jrow || ' AND ('; - IF caseless THEN - jrow := jrow || 'LOWER(' || my_alias || '.value) ' || op; - ELSE - jrow := jrow || my_alias || '.value ' || op; - END IF; - - jrow := jrow || ' ANY(($1->''' || tagkey || ''')::TEXT[])))'; + jrow := jrow || vandelay._node_tag_comparisons(caseless, my_alias, op, tags_rstore, tagkey); + jrow := jrow || '))'; ELSE -- svf jrow := jrow || 'record_attr) ' || my_alias || ' ON (' || my_alias || '.id = bre.id AND (' || @@ -697,6 +695,50 @@ BEGIN END; $$ LANGUAGE PLPGSQL; +CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons( + caseless BOOLEAN, + my_alias TEXT, + op TEXT, + tags_rstore HSTORE, + tagkey TEXT +) RETURNS TEXT AS $$ +DECLARE + result TEXT; + i INT; + vals TEXT[]; +BEGIN + i := 1; + vals := tags_rstore->tagkey; + result := ''; + + WHILE TRUE LOOP + IF i > 1 THEN + IF vals[i] IS NULL THEN + EXIT; + ELSE + result := result || ' OR '; + END IF; + END IF; + + IF caseless THEN + result := result || 'LOWER(' || my_alias || '.value) ' || op; + ELSE + result := result || my_alias || '.value ' || op; + END IF; + + result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL'); + + IF vals[i] IS NULL THEN + EXIT; + END IF; + i := i + 1; + END LOOP; + + RETURN result; + +END; +$$ LANGUAGE PLPGSQL; + CREATE OR REPLACE FUNCTION vandelay._get_expr_render_one( node vandelay.match_set_point ) RETURNS TEXT AS $$ diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql new file mode 100644 index 0000000000..34a2ee0a44 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.vandelay.import-match-no-like-any.sql @@ -0,0 +1,228 @@ +BEGIN; + +-- XXXX.schema.vandelay.import-match-no-like-any.sql + +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION vandelay.match_set_test_marcxml( + match_set_id INTEGER, record_xml TEXT +) RETURNS SETOF vandelay.match_set_test_result AS $$ +DECLARE + tags_rstore HSTORE; + svf_rstore HSTORE; + coal TEXT; + joins TEXT; + query_ TEXT; + wq TEXT; + qvalue INTEGER; + rec RECORD; +BEGIN + tags_rstore := vandelay.flatten_marc_hstore(record_xml); + svf_rstore := vandelay.extract_rec_attrs(record_xml); + + CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER); + CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT); + + -- generate the where clause and return that directly (into wq), and as + -- a side-effect, populate the _vandelay_tmp_[qj]rows tables. + wq := vandelay.get_expr_from_match_set(match_set_id, tags_rstore); + + query_ := 'SELECT DISTINCT(bre.id) AS record, '; + + -- qrows table is for the quality bits we add to the SELECT clause + SELECT ARRAY_TO_STRING( + ARRAY_ACCUM('COALESCE(n' || q::TEXT || '.quality, 0)'), ' + ' + ) INTO coal FROM _vandelay_tmp_qrows; + + -- our query string so far is the SELECT clause and the inital FROM. + -- no JOINs yet nor the WHERE clause + query_ := query_ || coal || ' AS quality ' || E'\n' || + 'FROM biblio.record_entry bre '; + + -- jrows table is for the joins we must make (and the real text conditions) + SELECT ARRAY_TO_STRING(ARRAY_ACCUM(j), E'\n') INTO joins + FROM _vandelay_tmp_jrows; + + -- add those joins and the where clause to our query. + query_ := query_ || joins || E'\n' || 'WHERE ' || wq || ' AND not bre.deleted'; + + -- this will return rows of record,quality + RAISE WARNING '%', query_; + FOR rec IN EXECUTE query_ USING tags_rstore, svf_rstore LOOP + RETURN NEXT rec; + END LOOP; + + DROP TABLE _vandelay_tmp_qrows; + DROP TABLE _vandelay_tmp_jrows; + RETURN; +END; + +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set( + match_set_id INTEGER, + tags_rstore HSTORE +) RETURNS TEXT AS $$ +DECLARE + root vandelay.match_set_point; +BEGIN + SELECT * INTO root FROM vandelay.match_set_point + WHERE parent IS NULL AND match_set = match_set_id; + + RETURN vandelay.get_expr_from_match_set_point(root, tags_rstore); +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point( + node vandelay.match_set_point, + tags_rstore HSTORE +) RETURNS TEXT AS $$ +DECLARE + q TEXT; + i INTEGER; + this_op TEXT; + children INTEGER[]; + child vandelay.match_set_point; +BEGIN + SELECT ARRAY_ACCUM(id) INTO children FROM vandelay.match_set_point + WHERE parent = node.id; + + IF ARRAY_LENGTH(children, 1) > 0 THEN + this_op := vandelay._get_expr_render_one(node); + q := '('; + i := 1; + WHILE children[i] IS NOT NULL LOOP + SELECT * INTO child FROM vandelay.match_set_point + WHERE id = children[i]; + IF i > 1 THEN + q := q || ' ' || this_op || ' '; + END IF; + i := i + 1; + q := q || vandelay.get_expr_from_match_set_point(child, tags_rstore); + END LOOP; + q := q || ')'; + RETURN q; + ELSIF node.bool_op IS NULL THEN + PERFORM vandelay._get_expr_push_qrow(node); + PERFORM vandelay._get_expr_push_jrow(node, tags_rstore); + RETURN vandelay._get_expr_render_one(node); + ELSE + RETURN ''; + END IF; +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow( + node vandelay.match_set_point, + tags_rstore HSTORE +) RETURNS VOID AS $$ +DECLARE + jrow TEXT; + my_alias TEXT; + op TEXT; + tagkey TEXT; + caseless BOOL; +BEGIN + -- remember $1 is tags_rstore, and $2 is svf_rstore + + caseless := FALSE; + + IF node.tag IS NOT NULL THEN + caseless := (node.tag IN ('020', '022', '024')); + tagkey := node.tag; + IF node.subfield IS NOT NULL THEN + tagkey := tagkey || node.subfield; + END IF; + END IF; + + IF node.negate THEN + IF caseless THEN + op := 'NOT LIKE'; + ELSE + op := '<>'; + END IF; + ELSE + IF caseless THEN + op := 'LIKE'; + ELSE + op := '='; + END IF; + END IF; + + my_alias := 'n' || node.id::TEXT; + + jrow := 'LEFT JOIN (SELECT *, ' || node.quality || + ' AS quality FROM metabib.'; + IF node.tag IS NOT NULL THEN + jrow := jrow || 'full_rec) ' || my_alias || ' ON (' || + my_alias || '.record = bre.id AND ' || my_alias || '.tag = ''' || + node.tag || ''''; + IF node.subfield IS NOT NULL THEN + jrow := jrow || ' AND ' || my_alias || '.subfield = ''' || + node.subfield || ''''; + END IF; + jrow := jrow || ' AND ('; + + jrow := jrow || vandelay._node_tag_comparisons(caseless, my_alias, op, tags_rstore, tagkey); + jrow := jrow || '))'; + ELSE -- svf + jrow := jrow || 'record_attr) ' || my_alias || ' ON (' || + my_alias || '.id = bre.id AND (' || + my_alias || '.attrs->''' || node.svf || + ''' ' || op || ' $2->''' || node.svf || '''))'; + END IF; + INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow); +END; +$$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay._node_tag_comparisons( + caseless BOOLEAN, + my_alias TEXT, + op TEXT, + tags_rstore HSTORE, + tagkey TEXT +) RETURNS TEXT AS $$ +DECLARE + result TEXT; + i INT; + vals TEXT[]; +BEGIN + i := 1; + vals := tags_rstore->tagkey; + result := ''; + + WHILE TRUE LOOP + IF i > 1 THEN + IF vals[i] IS NULL THEN + EXIT; + ELSE + result := result || ' OR '; + END IF; + END IF; + + IF caseless THEN + result := result || 'LOWER(' || my_alias || '.value) ' || op; + ELSE + result := result || my_alias || '.value ' || op; + END IF; + + result := result || ' ' || COALESCE('''' || vals[i] || '''', 'NULL'); + + IF vals[i] IS NULL THEN + EXIT; + END IF; + i := i + 1; + END LOOP; + + RETURN result; + +END; +$$ LANGUAGE PLPGSQL; + +-- drop old versions of these functions with fewer args +DROP FUNCTION vandelay.get_expr_from_match_set( INTEGER ); +DROP FUNCTION vandelay.get_expr_from_match_set_point( vandelay.match_set_point ); +DROP FUNCTION vandelay._get_expr_push_jrow( vandelay.match_set_point ); + +COMMIT; + -- 2.43.2