3 SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
5 -- We don't pass this function arrays with nulls, so we save 5% not testing for that
6 CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
8 ) RETURNS TEXT[] AS $F$
10 SELECT * FROM UNNEST($1) x
12 SELECT * FROM UNNEST($2) y
16 CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
19 no_limit BOOL DEFAULT FALSE,
20 prefix_length INT DEFAULT 6,
22 ) RETURNS SETOF search.symspell_dictionary AS $F$
27 entry search.symspell_dictionary%ROWTYPE;
31 IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
32 key := SUBSTRING(key FROM 1 FOR prefix_length);
33 key_list := ARRAY[raw_input, key];
35 key_list := ARRAY[key];
38 FOREACH del_key IN ARRAY key_list LOOP
40 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
42 entry.prefix_key := del_key;
44 entry.keyword_count := 0;
45 entry.title_count := 0;
46 entry.author_count := 0;
47 entry.subject_count := 0;
48 entry.series_count := 0;
49 entry.identifier_count := 0;
51 entry.keyword_suggestions := '{}';
52 entry.title_suggestions := '{}';
53 entry.author_suggestions := '{}';
54 entry.subject_suggestions := '{}';
55 entry.series_suggestions := '{}';
56 entry.identifier_suggestions := '{}';
58 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
59 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
60 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
61 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
62 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
63 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
64 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
66 IF del_key = raw_input THEN
67 IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
68 IF source_class = 'title' THEN entry.title_count := 1; END IF;
69 IF source_class = 'author' THEN entry.author_count := 1; END IF;
70 IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
71 IF source_class = 'series' THEN entry.series_count := 1; END IF;
72 IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
78 FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
81 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
82 -- skip suggestions that are already too long for the prefix key
83 CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
85 entry.keyword_suggestions := '{}';
86 entry.title_suggestions := '{}';
87 entry.author_suggestions := '{}';
88 entry.subject_suggestions := '{}';
89 entry.series_suggestions := '{}';
90 entry.identifier_suggestions := '{}';
92 IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
93 IF source_class = 'title' THEN entry.title_count := 0; END IF;
94 IF source_class = 'author' THEN entry.author_count := 0; END IF;
95 IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
96 IF source_class = 'series' THEN entry.series_count := 0; END IF;
97 IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
99 entry.prefix_key := del_key;
101 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
102 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
103 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
104 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
105 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
106 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
107 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
113 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
115 CREATE OR REPLACE FUNCTION search.symspell_build_entries (
118 old_input TEXT DEFAULT NULL,
119 include_phrases BOOL DEFAULT FALSE
120 ) RETURNS SETOF search.symspell_dictionary AS $F$
127 entry search.symspell_dictionary;
129 IF full_input IS NOT NULL THEN
130 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
131 prefix_length := COALESCE(prefix_length, 6);
133 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
134 maxED := COALESCE(maxED, 3);
136 input := evergreen.lowercase(full_input);
137 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
138 IF word_list IS NULL THEN
142 IF CARDINALITY(word_list) > 1 AND include_phrases THEN
143 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
146 FOREACH word IN ARRAY word_list LOOP
147 -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
148 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
149 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
153 IF old_input IS NOT NULL THEN
154 input := evergreen.lowercase(old_input);
156 FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
157 -- similarly skip words that have 5 or more digits here to
158 -- avoid adding erroneous prefix deletion entries to the dictionary
159 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
160 entry.prefix_key := word;
162 entry.keyword_count := 0;
163 entry.title_count := 0;
164 entry.author_count := 0;
165 entry.subject_count := 0;
166 entry.series_count := 0;
167 entry.identifier_count := 0;
169 entry.keyword_suggestions := '{}';
170 entry.title_suggestions := '{}';
171 entry.author_suggestions := '{}';
172 entry.subject_suggestions := '{}';
173 entry.series_suggestions := '{}';
174 entry.identifier_suggestions := '{}';
176 IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
177 IF source_class = 'title' THEN entry.title_count := -1; END IF;
178 IF source_class = 'author' THEN entry.author_count := -1; END IF;
179 IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
180 IF source_class = 'series' THEN entry.series_count := -1; END IF;
181 IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
187 $F$ LANGUAGE PLPGSQL;
189 CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
192 old_input TEXT DEFAULT NULL,
193 include_phrases BOOL DEFAULT FALSE
194 ) RETURNS SETOF search.symspell_dictionary AS $F$
197 conflict_entry RECORD;
200 IF full_input = old_input THEN -- neither NULL, and are the same
204 FOR new_entry IN EXECUTE $q$
208 FROM (SELECT prefix_key,
209 ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
210 SUM($q$ || source_class || $q$_count) count
211 FROM search.symspell_build_entries($1, $2, $3, $4)
213 $q$ USING full_input, source_class, old_input, include_phrases
217 $q$ || source_class || $q$_suggestions suggestions,
218 $q$ || source_class || $q$_count count
219 FROM search.symspell_dictionary
220 WHERE prefix_key = $1 $q$
222 USING new_entry.prefix_key;
224 IF new_entry.count <> 0 THEN -- Real word, and count changed
225 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
226 IF conflict_entry.count > 0 THEN -- it's a real word
227 RETURN QUERY EXECUTE $q$
228 UPDATE search.symspell_dictionary
229 SET $q$ || source_class || $q$_count = $2
230 WHERE prefix_key = $1
232 USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
233 ELSE -- it was a prefix key or delete-emptied word before
234 IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
235 RETURN QUERY EXECUTE $q$
236 UPDATE search.symspell_dictionary
237 SET $q$ || source_class || $q$_count = $2
238 WHERE prefix_key = $1
240 USING new_entry.prefix_key, GREATEST(0, new_entry.count);
241 ELSE -- new suggestion!
242 RETURN QUERY EXECUTE $q$
243 UPDATE search.symspell_dictionary
244 SET $q$ || source_class || $q$_count = $2,
245 $q$ || source_class || $q$_suggestions = $3
246 WHERE prefix_key = $1
248 USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
252 -- We keep the on-conflict clause just in case...
253 RETURN QUERY EXECUTE $q$
254 INSERT INTO search.symspell_dictionary AS d (
255 $q$ || source_class || $q$_count,
257 $q$ || source_class || $q$_suggestions
258 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
259 UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
260 $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
262 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
264 ELSE -- key only, or no change
265 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
266 IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
267 RETURN QUERY EXECUTE $q$
268 UPDATE search.symspell_dictionary
269 SET $q$ || source_class || $q$_suggestions = $2
270 WHERE prefix_key = $1
272 USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
275 RETURN QUERY EXECUTE $q$
276 INSERT INTO search.symspell_dictionary AS d (
277 $q$ || source_class || $q$_count,
279 $q$ || source_class || $q$_suggestions
280 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
281 UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
283 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
288 $F$ LANGUAGE PLPGSQL;
293 \qecho 'The following should be run at the end of the upgrade before any'
294 \qecho 'reingest occurs. Because new triggers are installed already,'
295 \qecho 'updates to indexed strings will cause zero-count dictionary entries'
296 \qecho 'to be recorded which will require updating every row again (or'
297 \qecho 'starting from scratch) so best to do this before other batch'
298 \qecho 'changes. A later reingest that does not significantly change'
299 \qecho 'indexed strings will /not/ cause table bloat here, and will be'
300 \qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped'
301 \qecho 'form is available inside a comment at the end of this upgrade sub-'
302 \qecho 'script so you do not need to copy this comment from the psql ouptut.'
308 \qecho 'select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);'
310 \qecho 'select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);'
312 \qecho 'select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);'
314 \qecho 'select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);'
315 \qecho '\\o identifier'
316 \qecho 'select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);'
318 \qecho 'select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);'
324 \qecho '// Then, at the command line:'
326 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql'
327 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql'
328 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql'
329 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql'
330 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql'
331 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql'
333 \qecho '// And, back in psql'
335 \qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;'
336 \qecho 'TRUNCATE search.symspell_dictionary;'
338 \qecho '\\i identifier.sql'
339 \qecho '\\i author.sql'
340 \qecho '\\i title.sql'
341 \qecho '\\i subject.sql'
342 \qecho '\\i series.sql'
343 \qecho '\\i keyword.sql'
345 \qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;'
346 \qecho 'REINDEX TABLE search.symspell_dictionary;'
347 \qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;'
348 \qecho 'VACUUM ANALYZE search.symspell_dictionary;'
350 \qecho 'DROP TABLE search.symspell_dictionary_partial_title;'
351 \qecho 'DROP TABLE search.symspell_dictionary_partial_author;'
352 \qecho 'DROP TABLE search.symspell_dictionary_partial_subject;'
353 \qecho 'DROP TABLE search.symspell_dictionary_partial_series;'
354 \qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;'
355 \qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;'
363 select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);
366 select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);
369 select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);
372 select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);
375 select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);
378 select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);
384 // Then, at the command line:
386 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql
387 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql
388 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql
389 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql
390 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql
391 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql
393 // To the extent your hardware allows, the above commands can be run in
394 // in parallel, in different shells. Each will use a full CPU, and RAM
395 // may be a limiting resource, so keep an eye on that with `top`.
400 ALTER TABLE search.symspell_dictionary SET UNLOGGED;
401 TRUNCATE search.symspell_dictionary;
410 CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;
411 REINDEX TABLE search.symspell_dictionary;
412 ALTER TABLE search.symspell_dictionary SET LOGGED;
413 VACUUM ANALYZE search.symspell_dictionary;
415 DROP TABLE search.symspell_dictionary_partial_title;
416 DROP TABLE search.symspell_dictionary_partial_author;
417 DROP TABLE search.symspell_dictionary_partial_subject;
418 DROP TABLE search.symspell_dictionary_partial_series;
419 DROP TABLE search.symspell_dictionary_partial_identifier;
420 DROP TABLE search.symspell_dictionary_partial_keyword;