3 SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
5 -- We don't pass this function arrays with nulls, so we save 5% not testing for that
6 CREATE OR REPLACE FUNCTION evergreen.text_array_merge_unique (
8 ) RETURNS TEXT[] AS $F$
10 SELECT * FROM UNNEST($1) x
12 SELECT * FROM UNNEST($2) y
16 CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
19 no_limit BOOL DEFAULT FALSE,
20 prefix_length INT DEFAULT 6,
22 ) RETURNS SETOF search.symspell_dictionary AS $F$
27 entry search.symspell_dictionary%ROWTYPE;
31 IF NOT no_limit AND CHARACTER_LENGTH(raw_input) > prefix_length THEN
32 key := SUBSTRING(key FROM 1 FOR prefix_length);
33 key_list := ARRAY[raw_input, key];
35 key_list := ARRAY[key];
38 FOREACH del_key IN ARRAY key_list LOOP
40 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
42 entry.prefix_key := del_key;
44 entry.keyword_count := 0;
45 entry.title_count := 0;
46 entry.author_count := 0;
47 entry.subject_count := 0;
48 entry.series_count := 0;
49 entry.identifier_count := 0;
51 entry.keyword_suggestions := '{}';
52 entry.title_suggestions := '{}';
53 entry.author_suggestions := '{}';
54 entry.subject_suggestions := '{}';
55 entry.series_suggestions := '{}';
56 entry.identifier_suggestions := '{}';
58 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
59 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
60 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
61 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
62 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
63 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
64 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
66 IF del_key = raw_input THEN
67 IF source_class = 'keyword' THEN entry.keyword_count := 1; END IF;
68 IF source_class = 'title' THEN entry.title_count := 1; END IF;
69 IF source_class = 'author' THEN entry.author_count := 1; END IF;
70 IF source_class = 'subject' THEN entry.subject_count := 1; END IF;
71 IF source_class = 'series' THEN entry.series_count := 1; END IF;
72 IF source_class = 'identifier' THEN entry.identifier_count := 1; END IF;
78 FOR del_key IN SELECT x FROM UNNEST(search.symspell_generate_edits(key, 1, maxED)) x LOOP
81 CONTINUE WHEN del_key IS NULL OR CHARACTER_LENGTH(del_key) = 0;
82 -- skip suggestions that are already too long for the prefix key
83 CONTINUE WHEN CHARACTER_LENGTH(del_key) <= (prefix_length - maxED) AND CHARACTER_LENGTH(raw_input) > prefix_length;
85 entry.keyword_suggestions := '{}';
86 entry.title_suggestions := '{}';
87 entry.author_suggestions := '{}';
88 entry.subject_suggestions := '{}';
89 entry.series_suggestions := '{}';
90 entry.identifier_suggestions := '{}';
92 IF source_class = 'keyword' THEN entry.keyword_count := 0; END IF;
93 IF source_class = 'title' THEN entry.title_count := 0; END IF;
94 IF source_class = 'author' THEN entry.author_count := 0; END IF;
95 IF source_class = 'subject' THEN entry.subject_count := 0; END IF;
96 IF source_class = 'series' THEN entry.series_count := 0; END IF;
97 IF source_class = 'identifier' THEN entry.identifier_count := 0; END IF;
99 entry.prefix_key := del_key;
101 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
102 IF source_class = 'title' THEN entry.title_suggestions := ARRAY[raw_input]; END IF;
103 IF source_class = 'author' THEN entry.author_suggestions := ARRAY[raw_input]; END IF;
104 IF source_class = 'subject' THEN entry.subject_suggestions := ARRAY[raw_input]; END IF;
105 IF source_class = 'series' THEN entry.series_suggestions := ARRAY[raw_input]; END IF;
106 IF source_class = 'identifier' THEN entry.identifier_suggestions := ARRAY[raw_input]; END IF;
107 IF source_class = 'keyword' THEN entry.keyword_suggestions := ARRAY[raw_input]; END IF;
113 $F$ LANGUAGE PLPGSQL STRICT IMMUTABLE;
115 CREATE OR REPLACE FUNCTION search.symspell_build_entries (
118 old_input TEXT DEFAULT NULL,
119 include_phrases BOOL DEFAULT FALSE
120 ) RETURNS SETOF search.symspell_dictionary AS $F$
127 entry search.symspell_dictionary;
129 IF full_input IS NOT NULL THEN
130 SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
131 prefix_length := COALESCE(prefix_length, 6);
133 SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
134 maxED := COALESCE(maxED, 3);
136 input := evergreen.lowercase(full_input);
137 word_list := ARRAY_AGG(x) FROM search.symspell_parse_words_distinct(input) x;
139 IF CARDINALITY(word_list) > 1 AND include_phrases THEN
140 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(input, source_class, TRUE, prefix_length, maxED);
143 FOREACH word IN ARRAY word_list LOOP
144 -- Skip words that have runs of 5 or more digits (I'm looking at you, ISxNs)
145 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
146 RETURN QUERY SELECT * FROM search.symspell_build_raw_entry(word, source_class, FALSE, prefix_length, maxED);
150 IF old_input IS NOT NULL THEN
151 input := evergreen.lowercase(old_input);
153 FOR word IN SELECT x FROM search.symspell_parse_words_distinct(input) x LOOP
154 -- similarly skip words that have 5 or more digits here to
155 -- avoid adding erroneous prefix deletion entries to the dictionary
156 CONTINUE WHEN CHARACTER_LENGTH(word) > 4 AND word ~ '\d{5,}';
157 entry.prefix_key := word;
159 entry.keyword_count := 0;
160 entry.title_count := 0;
161 entry.author_count := 0;
162 entry.subject_count := 0;
163 entry.series_count := 0;
164 entry.identifier_count := 0;
166 entry.keyword_suggestions := '{}';
167 entry.title_suggestions := '{}';
168 entry.author_suggestions := '{}';
169 entry.subject_suggestions := '{}';
170 entry.series_suggestions := '{}';
171 entry.identifier_suggestions := '{}';
173 IF source_class = 'keyword' THEN entry.keyword_count := -1; END IF;
174 IF source_class = 'title' THEN entry.title_count := -1; END IF;
175 IF source_class = 'author' THEN entry.author_count := -1; END IF;
176 IF source_class = 'subject' THEN entry.subject_count := -1; END IF;
177 IF source_class = 'series' THEN entry.series_count := -1; END IF;
178 IF source_class = 'identifier' THEN entry.identifier_count := -1; END IF;
184 $F$ LANGUAGE PLPGSQL;
186 CREATE OR REPLACE FUNCTION search.symspell_build_and_merge_entries (
189 old_input TEXT DEFAULT NULL,
190 include_phrases BOOL DEFAULT FALSE
191 ) RETURNS SETOF search.symspell_dictionary AS $F$
194 conflict_entry RECORD;
197 IF full_input = old_input THEN -- neither NULL, and are the same
201 FOR new_entry IN EXECUTE $q$
205 FROM (SELECT prefix_key,
206 ARRAY_AGG(DISTINCT $q$ || source_class || $q$_suggestions[1]) s,
207 SUM($q$ || source_class || $q$_count) count
208 FROM search.symspell_build_entries($1, $2, $3, $4)
210 $q$ USING full_input, source_class, old_input, include_phrases
214 $q$ || source_class || $q$_suggestions suggestions,
215 $q$ || source_class || $q$_count count
216 FROM search.symspell_dictionary
217 WHERE prefix_key = $1 $q$
219 USING new_entry.prefix_key;
221 IF new_entry.count <> 0 THEN -- Real word, and count changed
222 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
223 IF conflict_entry.count > 0 THEN -- it's a real word
224 RETURN QUERY EXECUTE $q$
225 UPDATE search.symspell_dictionary
226 SET $q$ || source_class || $q$_count = $2
227 WHERE prefix_key = $1
229 USING new_entry.prefix_key, GREATEST(0, new_entry.count + conflict_entry.count);
230 ELSE -- it was a prefix key or delete-emptied word before
231 IF conflict_entry.suggestions @> new_entry.suggestions THEN -- already have all suggestions here...
232 RETURN QUERY EXECUTE $q$
233 UPDATE search.symspell_dictionary
234 SET $q$ || source_class || $q$_count = $2
235 WHERE prefix_key = $1
237 USING new_entry.prefix_key, GREATEST(0, new_entry.count);
238 ELSE -- new suggestion!
239 RETURN QUERY EXECUTE $q$
240 UPDATE search.symspell_dictionary
241 SET $q$ || source_class || $q$_count = $2,
242 $q$ || source_class || $q$_suggestions = $3
243 WHERE prefix_key = $1
245 USING new_entry.prefix_key, GREATEST(0, new_entry.count), evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
249 -- We keep the on-conflict clause just in case...
250 RETURN QUERY EXECUTE $q$
251 INSERT INTO search.symspell_dictionary AS d (
252 $q$ || source_class || $q$_count,
254 $q$ || source_class || $q$_suggestions
255 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO
256 UPDATE SET $q$ || source_class || $q$_count = d.$q$ || source_class || $q$_count + EXCLUDED.$q$ || source_class || $q$_count,
257 $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
259 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
261 ELSE -- key only, or no change
262 IF conflict_entry.prefix_key IS NOT NULL THEN -- we'll be updating
263 IF NOT conflict_entry.suggestions @> new_entry.suggestions THEN -- There are new suggestions
264 RETURN QUERY EXECUTE $q$
265 UPDATE search.symspell_dictionary
266 SET $q$ || source_class || $q$_suggestions = $2
267 WHERE prefix_key = $1
269 USING new_entry.prefix_key, evergreen.text_array_merge_unique(conflict_entry.suggestions,new_entry.suggestions);
272 RETURN QUERY EXECUTE $q$
273 INSERT INTO search.symspell_dictionary AS d (
274 $q$ || source_class || $q$_count,
276 $q$ || source_class || $q$_suggestions
277 ) VALUES ( $1, $2, $3 ) ON CONFLICT (prefix_key) DO -- key exists, suggestions may be added due to this entry
278 UPDATE SET $q$ || source_class || $q$_suggestions = evergreen.text_array_merge_unique(d.$q$ || source_class || $q$_suggestions, EXCLUDED.$q$ || source_class || $q$_suggestions)
280 USING new_entry.count, new_entry.prefix_key, new_entry.suggestions;
285 $F$ LANGUAGE PLPGSQL;
290 \qecho 'The following should be run at the end of the upgrade before any'
291 \qecho 'reingest occurs. Because new triggers are installed already,'
292 \qecho 'updates to indexed strings will cause zero-count dictionary entries'
293 \qecho 'to be recorded which will require updating every row again (or'
294 \qecho 'starting from scratch) so best to do this before other batch'
295 \qecho 'changes. A later reingest that does not significantly change'
296 \qecho 'indexed strings will /not/ cause table bloat here, and will be'
297 \qecho 'as fast as normal. A copy of the SQL in a ready-to-use, non-escaped'
298 \qecho 'form is available inside a comment at the end of this upgrade sub-'
299 \qecho 'script so you do not need to copy this comment from the psql ouptut.'
305 \qecho 'select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);'
307 \qecho 'select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);'
309 \qecho 'select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);'
311 \qecho 'select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);'
312 \qecho '\\o identifier'
313 \qecho 'select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);'
315 \qecho 'select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);'
321 \qecho '// Then, at the command line:'
323 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql'
324 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql'
325 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql'
326 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql'
327 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql'
328 \qecho '$ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql'
330 \qecho '// And, back in psql'
332 \qecho 'ALTER TABLE search.symspell_dictionary SET UNLOGGED;'
333 \qecho 'TRUNCATE search.symspell_dictionary;'
335 \qecho '\\i identifier.sql'
336 \qecho '\\i author.sql'
337 \qecho '\\i title.sql'
338 \qecho '\\i subject.sql'
339 \qecho '\\i series.sql'
340 \qecho '\\i keyword.sql'
342 \qecho 'CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;'
343 \qecho 'REINDEX TABLE search.symspell_dictionary;'
344 \qecho 'ALTER TABLE search.symspell_dictionary SET LOGGED;'
345 \qecho 'VACUUM ANALYZE search.symspell_dictionary;'
347 \qecho 'DROP TABLE search.symspell_dictionary_partial_title;'
348 \qecho 'DROP TABLE search.symspell_dictionary_partial_author;'
349 \qecho 'DROP TABLE search.symspell_dictionary_partial_subject;'
350 \qecho 'DROP TABLE search.symspell_dictionary_partial_series;'
351 \qecho 'DROP TABLE search.symspell_dictionary_partial_identifier;'
352 \qecho 'DROP TABLE search.symspell_dictionary_partial_keyword;'
360 select value from metabib.title_field_entry where source in (select id from biblio.record_entry where not deleted);
363 select value from metabib.author_field_entry where source in (select id from biblio.record_entry where not deleted);
366 select value from metabib.subject_field_entry where source in (select id from biblio.record_entry where not deleted);
369 select value from metabib.series_field_entry where source in (select id from biblio.record_entry where not deleted);
372 select value from metabib.identifier_field_entry where source in (select id from biblio.record_entry where not deleted);
375 select value from metabib.keyword_field_entry where source in (select id from biblio.record_entry where not deleted);
381 // Then, at the command line:
383 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl title > title.sql
384 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl author > author.sql
385 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl subject > subject.sql
386 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl series > series.sql
387 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl identifier > identifier.sql
388 $ ~/EG-src-path/Open-ILS/src/support-scripts/symspell-sideload.pl keyword > keyword.sql
390 // To the extent your hardware allows, the above commands can be run in
391 // in parallel, in different shells. Each will use a full CPU, and RAM
392 // may be a limiting resource, so keep an eye on that with `top`.
397 ALTER TABLE search.symspell_dictionary SET UNLOGGED;
398 TRUNCATE search.symspell_dictionary;
407 CLUSTER search.symspell_dictionary USING symspell_dictionary_pkey;
408 REINDEX TABLE search.symspell_dictionary;
409 ALTER TABLE search.symspell_dictionary SET LOGGED;
410 VACUUM ANALYZE search.symspell_dictionary;
412 DROP TABLE search.symspell_dictionary_partial_title;
413 DROP TABLE search.symspell_dictionary_partial_author;
414 DROP TABLE search.symspell_dictionary_partial_subject;
415 DROP TABLE search.symspell_dictionary_partial_series;
416 DROP TABLE search.symspell_dictionary_partial_identifier;
417 DROP TABLE search.symspell_dictionary_partial_keyword;