3 SELECT evergreen.upgrade_deps_block_check('1340', :eg_version);
5 -- INSERT-only table that catches dictionary updates to be reconciled
6 CREATE UNLOGGED TABLE search.symspell_dictionary_updates (
8 keyword_count INT NOT NULL DEFAULT 0,
9 title_count INT NOT NULL DEFAULT 0,
10 author_count INT NOT NULL DEFAULT 0,
11 subject_count INT NOT NULL DEFAULT 0,
12 series_count INT NOT NULL DEFAULT 0,
13 identifier_count INT NOT NULL DEFAULT 0,
15 prefix_key TEXT NOT NULL,
17 keyword_suggestions TEXT[],
18 title_suggestions TEXT[],
19 author_suggestions TEXT[],
20 subject_suggestions TEXT[],
21 series_suggestions TEXT[],
22 identifier_suggestions TEXT[]
24 CREATE INDEX symspell_dictionary_updates_tid_idx ON search.symspell_dictionary_updates (transaction_id);
26 -- Function that collects this transactions additions to the unlogged update table
27 CREATE OR REPLACE FUNCTION search.symspell_dictionary_reify () RETURNS SETOF search.symspell_dictionary AS $f$
29 DELETE FROM search.symspell_dictionary_updates WHERE transaction_id = txid_current() RETURNING *
30 ), computed_rows AS ( -- this collapses the rows deleted into the format we need for UPSERT
31 SELECT SUM(keyword_count) AS keyword_count,
32 SUM(title_count) AS title_count,
33 SUM(author_count) AS author_count,
34 SUM(subject_count) AS subject_count,
35 SUM(series_count) AS series_count,
36 SUM(identifier_count) AS identifier_count,
40 ARRAY_REMOVE(ARRAY_AGG(DISTINCT keyword_suggestions[1]), NULL) AS keyword_suggestions,
41 ARRAY_REMOVE(ARRAY_AGG(DISTINCT title_suggestions[1]), NULL) AS title_suggestions,
42 ARRAY_REMOVE(ARRAY_AGG(DISTINCT author_suggestions[1]), NULL) AS author_suggestions,
43 ARRAY_REMOVE(ARRAY_AGG(DISTINCT subject_suggestions[1]), NULL) AS subject_suggestions,
44 ARRAY_REMOVE(ARRAY_AGG(DISTINCT series_suggestions[1]), NULL) AS series_suggestions,
45 ARRAY_REMOVE(ARRAY_AGG(DISTINCT identifier_suggestions[1]), NULL) AS identifier_suggestions
49 INSERT INTO search.symspell_dictionary AS d SELECT * FROM computed_rows
50 ON CONFLICT (prefix_key) DO UPDATE SET
51 keyword_count = GREATEST(0, d.keyword_count + EXCLUDED.keyword_count),
52 keyword_suggestions = evergreen.text_array_merge_unique(EXCLUDED.keyword_suggestions,d.keyword_suggestions),
54 title_count = GREATEST(0, d.title_count + EXCLUDED.title_count),
55 title_suggestions = evergreen.text_array_merge_unique(EXCLUDED.title_suggestions,d.title_suggestions),
57 author_count = GREATEST(0, d.author_count + EXCLUDED.author_count),
58 author_suggestions = evergreen.text_array_merge_unique(EXCLUDED.author_suggestions,d.author_suggestions),
60 subject_count = GREATEST(0, d.subject_count + EXCLUDED.subject_count),
61 subject_suggestions = evergreen.text_array_merge_unique(EXCLUDED.subject_suggestions,d.subject_suggestions),
63 series_count = GREATEST(0, d.series_count + EXCLUDED.series_count),
64 series_suggestions = evergreen.text_array_merge_unique(EXCLUDED.series_suggestions,d.series_suggestions),
66 identifier_count = GREATEST(0, d.identifier_count + EXCLUDED.identifier_count),
67 identifier_suggestions = evergreen.text_array_merge_unique(EXCLUDED.identifier_suggestions,d.identifier_suggestions)
71 -- simplified metabib.*_field_entry trigger that stages updates for reification in one go
72 CREATE OR REPLACE FUNCTION search.symspell_maintain_entries () RETURNS TRIGGER AS $f$
75 new_value TEXT := NULL;
76 old_value TEXT := NULL;
78 search_class := COALESCE(TG_ARGV[0], SPLIT_PART(TG_TABLE_NAME,'_',1));
80 IF TG_OP IN ('INSERT', 'UPDATE') THEN
81 new_value := NEW.value;
84 IF TG_OP IN ('DELETE', 'UPDATE') THEN
85 old_value := OLD.value;
88 IF new_value = old_value THEN
91 INSERT INTO search.symspell_dictionary_updates
92 SELECT txid_current(), *
93 FROM search.symspell_build_entries(
100 RETURN NULL; -- always fired AFTER
102 $f$ LANGUAGE PLPGSQL;
104 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries(
106 skip_facet BOOL DEFAULT FALSE,
107 skip_display BOOL DEFAULT FALSE,
108 skip_browse BOOL DEFAULT FALSE,
109 skip_search BOOL DEFAULT FALSE,
110 only_fields INT[] DEFAULT '{}'::INT[]
111 ) RETURNS VOID AS $func$
114 ind_data metabib.field_entry_template%ROWTYPE;
115 mbe_row metabib.browse_entry%ROWTYPE;
122 field_list INT[] := only_fields;
123 field_types TEXT[] := '{}'::TEXT[];
126 IF field_list = '{}'::INT[] THEN
127 SELECT ARRAY_AGG(id) INTO field_list FROM config.metabib_field;
130 SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
131 SELECT COALESCE(NULLIF(skip_display, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing' AND enabled)) INTO b_skip_display;
132 SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
133 SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
135 IF NOT b_skip_facet THEN field_types := field_types || '{facet}'; END IF;
136 IF NOT b_skip_display THEN field_types := field_types || '{display}'; END IF;
137 IF NOT b_skip_browse THEN field_types := field_types || '{browse}'; END IF;
138 IF NOT b_skip_search THEN field_types := field_types || '{search}'; END IF;
140 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
142 IF NOT b_skip_search THEN
143 FOR fclass IN SELECT * FROM config.metabib_class LOOP
144 -- RAISE NOTICE 'Emptying out %', fclass.name;
145 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
148 IF NOT b_skip_facet THEN
149 DELETE FROM metabib.facet_entry WHERE source = bib_id;
151 IF NOT b_skip_display THEN
152 DELETE FROM metabib.display_entry WHERE source = bib_id;
154 IF NOT b_skip_browse THEN
155 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
159 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id, ' ', field_types, field_list ) LOOP
161 -- don't store what has been normalized away
162 CONTINUE WHEN ind_data.value IS NULL;
164 IF ind_data.field < 0 THEN
165 ind_data.field = -1 * ind_data.field;
168 IF ind_data.facet_field AND NOT b_skip_facet THEN
169 INSERT INTO metabib.facet_entry (field, source, value)
170 VALUES (ind_data.field, ind_data.source, ind_data.value);
173 IF ind_data.display_field AND NOT b_skip_display THEN
174 INSERT INTO metabib.display_entry (field, source, value)
175 VALUES (ind_data.field, ind_data.source, ind_data.value);
179 IF ind_data.browse_field AND NOT b_skip_browse THEN
180 -- A caveat about this SELECT: this should take care of replacing
181 -- old mbe rows when data changes, but not if normalization (by
182 -- which I mean specifically the output of
183 -- evergreen.oils_tsearch2()) changes. It may or may not be
184 -- expensive to add a comparison of index_vector to index_vector
185 -- to the WHERE clause below.
187 CONTINUE WHEN ind_data.sort_value IS NULL;
189 value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
190 IF ind_data.browse_nocase THEN
191 SELECT INTO mbe_row * FROM metabib.browse_entry
192 WHERE evergreen.lowercase(value) = evergreen.lowercase(value_prepped) AND sort_value = ind_data.sort_value
193 ORDER BY sort_value, value LIMIT 1; -- gotta pick something, I guess
195 SELECT INTO mbe_row * FROM metabib.browse_entry
196 WHERE value = value_prepped AND sort_value = ind_data.sort_value;
200 mbe_id := mbe_row.id;
202 INSERT INTO metabib.browse_entry
203 ( value, sort_value ) VALUES
204 ( value_prepped, ind_data.sort_value );
206 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
209 INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
210 VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
213 IF ind_data.search_field AND NOT b_skip_search THEN
214 -- Avoid inserting duplicate rows
215 EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
216 '_field_entry WHERE field = $1 AND source = $2 AND value = $3'
217 INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
218 -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
219 IF mbe_id IS NULL THEN
221 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
223 quote_literal(ind_data.field) || $$, $$ ||
224 quote_literal(ind_data.source) || $$, $$ ||
225 quote_literal(ind_data.value) ||
232 IF NOT b_skip_search THEN
233 PERFORM metabib.update_combined_index_vectors(bib_id);
234 PERFORM search.symspell_dictionary_reify();
239 $func$ LANGUAGE PLPGSQL;