From 8508a0d89efb0305e590bec9d6b94c1236a5e641 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Mon, 24 Jun 2013 15:48:07 -0400 Subject: [PATCH] Create upgrade script for ingest normalization changes This provides the upgrade script for the ingest changes outlined in LP 1187433. In addition, the version-upgrade script for 2.3 to 2.4.0 has been modified to move the final reingest step until after this incremental change has been applied. Signed-off-by: Mike Rylander Signed-off-by: Dan Scott --- .../XXXX.function.apostrophe-search.sql | 202 ++++++++++++++++++ .../version-upgrade/2.3-2.4.0-upgrade-db.sql | 2 - 2 files changed, 202 insertions(+), 2 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql new file mode 100644 index 0000000000..3b7e26f904 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.apostrophe-search.sql @@ -0,0 +1,202 @@ +BEGIN; + +SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ +DECLARE + fclass RECORD; + ind_data metabib.field_entry_template%ROWTYPE; + mbe_row metabib.browse_entry%ROWTYPE; + mbe_id BIGINT; + b_skip_facet BOOL; + b_skip_browse BOOL; + b_skip_search BOOL; +BEGIN + + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; + SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; + SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; + IF NOT FOUND THEN + IF NOT b_skip_search THEN + FOR fclass IN SELECT * FROM config.metabib_class LOOP + -- RAISE NOTICE 'Emptying out %', fclass.name; + EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; + END LOOP; + END IF; + IF NOT b_skip_facet THEN + DELETE FROM metabib.facet_entry WHERE source = bib_id; + END IF; + IF NOT b_skip_browse THEN + DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; + END IF; + END IF; + + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + IF ind_data.field < 0 THEN + ind_data.field = -1 * ind_data.field; + END IF; + + IF ind_data.facet_field AND NOT b_skip_facet THEN + INSERT INTO metabib.facet_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + IF ind_data.browse_field AND NOT b_skip_browse THEN + -- A caveat about this SELECT: this should take care of replacing + -- old mbe rows when data changes, but not if normalization (by + -- which I mean specifically the output of + -- evergreen.oils_tsearch2()) changes. It may or may not be + -- expensive to add a comparison of index_vector to index_vector + -- to the WHERE clause below. + SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value; + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_entry (value) VALUES + (metabib.browse_normalize(ind_data.value, ind_data.field)); + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_entry_def_map (entry, def, source) + VALUES (mbe_id, ind_data.field, ind_data.source); + END IF; + + -- Avoid inserting duplicate rows, but retain granularity of being + -- able to search browse fields with "starts with" type operators + -- (for example, for titles of songs in music albums) + IF (ind_data.search_field OR ind_data.browse_field) AND NOT b_skip_search THEN + EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || + '_field_entry WHERE field = $1 AND source = $2 AND value = $3' + INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; + -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; + IF mbe_id IS NULL THEN + EXECUTE $$ + INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) + VALUES ($$ || + quote_literal(ind_data.field) || $$, $$ || + quote_literal(ind_data.source) || $$, $$ || + quote_literal(ind_data.value) || + $$);$$; + END IF; + END IF; + + END LOOP; + + IF NOT b_skip_search THEN + PERFORM metabib.update_combined_index_vectors(bib_id); + END IF; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$ +DECLARE + normalizer RECORD; + value TEXT := ''; + temp_vector TEXT := ''; + ts_rec RECORD; + cur_weight "char"; +BEGIN + + value := NEW.value; + NEW.index_vector = ''::tsvector; + + IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE field = NEW.field AND m.pos < 0 + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( value ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO value; + + END LOOP; + + NEW.value = value; + + FOR normalizer IN + SELECT n.func AS func, + n.param_count AS param_count, + m.params AS params + FROM config.index_normalizer n + JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id) + WHERE field = NEW.field AND m.pos >= 0 + ORDER BY m.pos LOOP + EXECUTE 'SELECT ' || normalizer.func || '(' || + quote_literal( value ) || + CASE + WHEN normalizer.param_count > 0 + THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'') + ELSE '' + END || + ')' INTO value; + + END LOOP; + END IF; + + IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN + value := ARRAY_TO_STRING( + evergreen.regexp_split_to_array(value, E'\\W+'), ' ' + ); + value := public.search_normalize(value); + NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value); + ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN + FOR ts_rec IN + SELECT ts_config, index_weight + FROM config.metabib_class_ts_map + WHERE field_class = TG_ARGV[0] + AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language')) + AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field) + UNION + SELECT ts_config, index_weight + FROM config.metabib_field_ts_map + WHERE metabib_field = NEW.field + AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language')) + ORDER BY index_weight ASC + LOOP + IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN + NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); + temp_vector = ''; + END IF; + cur_weight = ts_rec.index_weight; + SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT; + END LOOP; + NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight); + ELSE + NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value); + END IF; + + RETURN NEW; +END; +$$ LANGUAGE PLPGSQL; + +COMMIT; + +\qecho **** If upgrading from Evergreen 2.3 or before, now is the time to run +\qecho **** Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4-supplemental.sh, which +\qecho **** contains additional required SQL to complete your Evergreen upgrade! +\qecho +\qecho **** If upgrading from Evergreen 2.4.0, you will need to reingest your +\qecho **** full data set. In order to allow this to continue without locking +\qecho **** your entire bibliographic data set, consider generating an SQL script +\qecho **** with the following query, and running that via psql: +\qecho +\qecho \t +\qecho \o /tmp/reingest-2.4.1.sql +\qecho SELECT 'select metabib.reingest_metabib_field_entries(' || id || ');' FROM biblio.record_entry WHERE NOT DELETED AND id > 0; +\qecho \o +\qecho \t +\qecho + diff --git a/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql b/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql index 28ce7bd5c1..6c172a9bf8 100644 --- a/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql +++ b/Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4.0-upgrade-db.sql @@ -3993,5 +3993,3 @@ CREATE TEXT SEARCH CONFIGURATION subject ( COPY = english_nostop ); CREATE TEXT SEARCH CONFIGURATION series ( COPY = english_nostop ); CREATE TEXT SEARCH CONFIGURATION identifier ( COPY = english_nostop ); -\qecho Please run Open-ILS/src/sql/Pg/version-upgrade/2.3-2.4-supplemental.sh now, which contains additional required SQL to complete your Evergreen upgrade! - -- 2.43.2