From 0e18946d8ce21af964ec71f8c460b234e79fc22b Mon Sep 17 00:00:00 2001 From: Thomas Berezansky Date: Thu, 25 Oct 2012 19:51:44 -0400 Subject: [PATCH] Add tsearch2 removal upgrade script Signed-off-by: Thomas Berezansky Signed-off-by: Dan Scott --- Open-ILS/src/sql/Pg/002.schema.config.sql | 2 +- .../upgrade/0743.schema.remove_tsearch2.sql | 181 ++++++++++++++++++ 2 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/0743.schema.remove_tsearch2.sql diff --git a/Open-ILS/src/sql/Pg/002.schema.config.sql b/Open-ILS/src/sql/Pg/002.schema.config.sql index 1a5775fc33..e722361130 100644 --- a/Open-ILS/src/sql/Pg/002.schema.config.sql +++ b/Open-ILS/src/sql/Pg/002.schema.config.sql @@ -87,7 +87,7 @@ CREATE TRIGGER no_overlapping_deps BEFORE INSERT OR UPDATE ON config.db_patch_dependencies FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates'); -INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0742', :eg_version); -- dbs/senator +INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0743', :eg_version); -- dbs/tsbere CREATE TABLE config.bib_source ( id SERIAL PRIMARY KEY, diff --git a/Open-ILS/src/sql/Pg/upgrade/0743.schema.remove_tsearch2.sql b/Open-ILS/src/sql/Pg/upgrade/0743.schema.remove_tsearch2.sql new file mode 100644 index 0000000000..062e5a2c6b --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/0743.schema.remove_tsearch2.sql @@ -0,0 +1,181 @@ +-- Evergreen DB patch 0743.schema.remove_tsearch2.sql +-- +-- Enable native full-text search to be used, and drop TSearch2 extension +-- +BEGIN; + +-- check whether patch can be applied +SELECT evergreen.upgrade_deps_block_check('0743', :eg_version); + +-- FIXME: add/check SQL statements to perform the upgrade +-- First up, these functions depend on metabib.full_rec. They have to go for now. +DROP FUNCTION IF EXISTS biblio.flatten_marc(bigint); +DROP FUNCTION IF EXISTS biblio.flatten_marc(text); + +-- These views depend on metabib.full_rec as well. Bye-bye! +DROP VIEW IF EXISTS reporter.old_super_simple_record; +DROP VIEW IF EXISTS reporter.simple_record; + +-- Now we can drop metabib.full_rec. +DROP VIEW IF EXISTS metabib.full_rec; + +-- These indexes have to go. BEFORE we alter the tables, otherwise things take extra time when we alter the tables. +DROP INDEX metabib.metabib_author_field_entry_value_idx; +DROP INDEX metabib.metabib_identifier_field_entry_value_idx; +DROP INDEX metabib.metabib_keyword_field_entry_value_idx; +DROP INDEX metabib.metabib_series_field_entry_value_idx; +DROP INDEX metabib.metabib_subject_field_entry_value_idx; +DROP INDEX metabib.metabib_title_field_entry_value_idx; + +-- Now grab all of the tsvector-enabled columns and switch them to the non-wrapper version of the type. +ALTER TABLE authority.full_rec ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE authority.simple_heading ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.real_full_rec ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.author_field_entry ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.browse_entry ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.identifier_field_entry ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.keyword_field_entry ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.series_field_entry ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.subject_field_entry ALTER COLUMN index_vector TYPE pg_catalog.tsvector; +ALTER TABLE metabib.title_field_entry ALTER COLUMN index_vector TYPE pg_catalog.tsvector; + +-- Halfway there! Goodbye tsearch2 extension! +DROP EXTENSION tsearch2; + +-- Next up, re-creating all of the stuff we just dropped. + +-- Indexes! Note to whomever: Do we even need these anymore? +CREATE INDEX metabib_author_field_entry_value_idx ON metabib.author_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; +CREATE INDEX metabib_identifier_field_entry_value_idx ON metabib.identifier_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; +CREATE INDEX metabib_keyword_field_entry_value_idx ON metabib.keyword_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; +CREATE INDEX metabib_series_field_entry_value_idx ON metabib.series_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; +CREATE INDEX metabib_subject_field_entry_value_idx ON metabib.subject_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; +CREATE INDEX metabib_title_field_entry_value_idx ON metabib.title_field_entry (SUBSTRING(value,1,1024)) WHERE index_vector = ''::TSVECTOR; + +-- metabib.full_rec, with insert/update/delete rules +CREATE OR REPLACE VIEW metabib.full_rec AS + SELECT id, + record, + tag, + ind1, + ind2, + subfield, + SUBSTRING(value,1,1024) AS value, + index_vector + FROM metabib.real_full_rec; + +CREATE OR REPLACE RULE metabib_full_rec_insert_rule + AS ON INSERT TO metabib.full_rec + DO INSTEAD + INSERT INTO metabib.real_full_rec VALUES ( + COALESCE(NEW.id, NEXTVAL('metabib.full_rec_id_seq'::REGCLASS)), + NEW.record, + NEW.tag, + NEW.ind1, + NEW.ind2, + NEW.subfield, + NEW.value, + NEW.index_vector + ); + +CREATE OR REPLACE RULE metabib_full_rec_update_rule + AS ON UPDATE TO metabib.full_rec + DO INSTEAD + UPDATE metabib.real_full_rec SET + id = NEW.id, + record = NEW.record, + tag = NEW.tag, + ind1 = NEW.ind1, + ind2 = NEW.ind2, + subfield = NEW.subfield, + value = NEW.value, + index_vector = NEW.index_vector + WHERE id = OLD.id; + +CREATE OR REPLACE RULE metabib_full_rec_delete_rule + AS ON DELETE TO metabib.full_rec + DO INSTEAD + DELETE FROM metabib.real_full_rec WHERE id = OLD.id; + +-- reporter views that depended on metabib.full_rec are up next +CREATE OR REPLACE VIEW reporter.simple_record AS +SELECT r.id, + s.metarecord, + r.fingerprint, + r.quality, + r.tcn_source, + r.tcn_value, + title.value AS title, + uniform_title.value AS uniform_title, + author.value AS author, + publisher.value AS publisher, + SUBSTRING(pubdate.value FROM $$\d+$$) AS pubdate, + series_title.value AS series_title, + series_statement.value AS series_statement, + summary.value AS summary, + ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn, + ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '650' AND subfield = 'a' AND record = r.id)) AS topic_subject, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '651' AND subfield = 'a' AND record = r.id)) AS geographic_subject, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '655' AND subfield = 'a' AND record = r.id)) AS genre, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '600' AND subfield = 'a' AND record = r.id)) AS name_subject, + ARRAY((SELECT DISTINCT value FROM metabib.full_rec WHERE tag = '610' AND subfield = 'a' AND record = r.id)) AS corporate_subject, + ARRAY((SELECT value FROM metabib.full_rec WHERE tag = '856' AND subfield IN ('3','y','u') AND record = r.id ORDER BY CASE WHEN subfield IN ('3','y') THEN 0 ELSE 1 END)) AS external_uri + FROM biblio.record_entry r + JOIN metabib.metarecord_source_map s ON (s.source = r.id) + LEFT JOIN metabib.full_rec uniform_title ON (r.id = uniform_title.record AND uniform_title.tag = '240' AND uniform_title.subfield = 'a') + LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') + LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag = '100' AND author.subfield = 'a') + LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND publisher.tag = '260' AND publisher.subfield = 'b') + LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND pubdate.tag = '260' AND pubdate.subfield = 'c') + LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) + LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') + LEFT JOIN metabib.full_rec series_title ON (r.id = series_title.record AND series_title.tag IN ('830','440') AND series_title.subfield = 'a') + LEFT JOIN metabib.full_rec series_statement ON (r.id = series_statement.record AND series_statement.tag = '490' AND series_statement.subfield = 'a') + LEFT JOIN metabib.full_rec summary ON (r.id = summary.record AND summary.tag = '520' AND summary.subfield = 'a') + GROUP BY 1,2,3,4,5,6,7,8,9,10,11,12,13,14; + +CREATE OR REPLACE VIEW reporter.old_super_simple_record AS +SELECT r.id, + r.fingerprint, + r.quality, + r.tcn_source, + r.tcn_value, + FIRST(title.value) AS title, + FIRST(author.value) AS author, + ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT publisher.value), ', ') AS publisher, + ARRAY_TO_STRING(ARRAY_ACCUM( DISTINCT SUBSTRING(pubdate.value FROM $$\d+$$) ), ', ') AS pubdate, + ARRAY_ACCUM( DISTINCT REPLACE(SUBSTRING(isbn.value FROM $$^\S+$$), '-', '') ) AS isbn, + ARRAY_ACCUM( DISTINCT REGEXP_REPLACE(issn.value, E'^\\S*(\\d{4})[-\\s](\\d{3,4}x?)', E'\\1 \\2') ) AS issn + FROM biblio.record_entry r + LEFT JOIN metabib.full_rec title ON (r.id = title.record AND title.tag = '245' AND title.subfield = 'a') + LEFT JOIN metabib.full_rec author ON (r.id = author.record AND author.tag IN ('100','110','111') AND author.subfield = 'a') + LEFT JOIN metabib.full_rec publisher ON (r.id = publisher.record AND publisher.tag = '260' AND publisher.subfield = 'b') + LEFT JOIN metabib.full_rec pubdate ON (r.id = pubdate.record AND pubdate.tag = '260' AND pubdate.subfield = 'c') + LEFT JOIN metabib.full_rec isbn ON (r.id = isbn.record AND isbn.tag IN ('024', '020') AND isbn.subfield IN ('a','z')) + LEFT JOIN metabib.full_rec issn ON (r.id = issn.record AND issn.tag = '022' AND issn.subfield = 'a') + GROUP BY 1,2,3,4,5; + +-- And finally, the biblio functions. NOTE: I can't find the original source of the second one, so I skipped it as old cruft that was in our production DB. +CREATE OR REPLACE FUNCTION biblio.flatten_marc ( rid BIGINT ) RETURNS SETOF metabib.full_rec AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + output metabib.full_rec%ROWTYPE; + field RECORD; +BEGIN + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + FOR field IN SELECT * FROM vandelay.flatten_marc( bib.marc ) LOOP + output.record := rid; + output.ind1 := field.ind1; + output.ind2 := field.ind2; + output.tag := field.tag; + output.subfield := field.subfield; + output.value := field.value; + + RETURN NEXT output; + END LOOP; +END; +$func$ LANGUAGE PLPGSQL; + +COMMIT; -- 2.43.2