3 SELECT evergreen.upgrade_deps_block_check('0757', :eg_version);
5 SET search_path = public, pg_catalog;
11 FOR lang IN SELECT substring(pptsd.dictname from '(.*)_stem$') AS lang FROM pg_catalog.pg_ts_dict pptsd JOIN pg_catalog.pg_namespace ppn ON ppn.oid = pptsd.dictnamespace
12 WHERE ppn.nspname = 'pg_catalog' AND pptsd.dictname LIKE '%_stem' LOOP
13 RAISE NOTICE 'FOUND LANGUAGE %', lang;
15 EXECUTE 'DROP TEXT SEARCH DICTIONARY IF EXISTS ' || lang || '_nostop CASCADE;
16 CREATE TEXT SEARCH DICTIONARY ' || lang || '_nostop (TEMPLATE=pg_catalog.snowball, language=''' || lang || ''');
17 COMMENT ON TEXT SEARCH DICTIONARY ' || lang || '_nostop IS ''' ||lang || ' snowball stemmer with no stopwords for ASCII words only.'';
18 CREATE TEXT SEARCH CONFIGURATION ' || lang || '_nostop ( COPY = pg_catalog.' || lang || ' );
19 ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR word, hword, hword_part WITH pg_catalog.simple;
20 ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH ' || lang || '_nostop;';
25 CREATE TEXT SEARCH CONFIGURATION keyword ( COPY = english_nostop );
26 CREATE TEXT SEARCH CONFIGURATION "default" ( COPY = english_nostop );
28 SET search_path = evergreen, public, pg_catalog;
30 ALTER TABLE config.metabib_class
31 ADD COLUMN a_weight NUMERIC DEFAULT 1.0 NOT NULL,
32 ADD COLUMN b_weight NUMERIC DEFAULT 0.4 NOT NULL,
33 ADD COLUMN c_weight NUMERIC DEFAULT 0.2 NOT NULL,
34 ADD COLUMN d_weight NUMERIC DEFAULT 0.1 NOT NULL;
36 CREATE TABLE config.ts_config_list (
40 COMMENT ON TABLE config.ts_config_list IS $$
43 A list of full text configs with names and descriptions.
46 CREATE TABLE config.metabib_class_ts_map (
47 id SERIAL PRIMARY KEY,
48 field_class TEXT NOT NULL REFERENCES config.metabib_class (name),
49 ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id),
50 active BOOL NOT NULL DEFAULT TRUE,
51 index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')),
53 search_lang TEXT NULL,
54 always BOOL NOT NULL DEFAULT true
56 COMMENT ON TABLE config.metabib_class_ts_map IS $$
57 Text Search Configs for metabib class indexing
59 This table contains text search config definitions for
60 storing index_vector values.
63 CREATE TABLE config.metabib_field_ts_map (
64 id SERIAL PRIMARY KEY,
65 metabib_field INT NOT NULL REFERENCES config.metabib_field (id),
66 ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id),
67 active BOOL NOT NULL DEFAULT TRUE,
68 index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')),
72 COMMENT ON TABLE config.metabib_field_ts_map IS $$
73 Text Search Configs for metabib field indexing
75 This table contains text search config definitions for
76 storing index_vector values.
79 CREATE TABLE metabib.combined_identifier_field_entry (
80 record BIGINT NOT NULL,
81 metabib_field INT NULL,
82 index_vector tsvector NOT NULL
84 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
85 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
86 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
88 CREATE TABLE metabib.combined_title_field_entry (
89 record BIGINT NOT NULL,
90 metabib_field INT NULL,
91 index_vector tsvector NOT NULL
93 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
94 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
95 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
97 CREATE TABLE metabib.combined_author_field_entry (
98 record BIGINT NOT NULL,
99 metabib_field INT NULL,
100 index_vector tsvector NOT NULL
102 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
103 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
104 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
106 CREATE TABLE metabib.combined_subject_field_entry (
107 record BIGINT NOT NULL,
108 metabib_field INT NULL,
109 index_vector tsvector NOT NULL
111 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
112 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
113 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
115 CREATE TABLE metabib.combined_keyword_field_entry (
116 record BIGINT NOT NULL,
117 metabib_field INT NULL,
118 index_vector tsvector NOT NULL
120 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
121 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
122 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
124 CREATE TABLE metabib.combined_series_field_entry (
125 record BIGINT NOT NULL,
126 metabib_field INT NULL,
127 index_vector tsvector NOT NULL
129 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
130 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
131 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
133 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
135 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
136 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
137 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
138 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
139 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
140 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
141 FROM metabib.keyword_field_entry WHERE source = bib_id;
143 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
144 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
145 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
146 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
147 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
148 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
149 FROM metabib.title_field_entry WHERE source = bib_id;
151 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
152 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
153 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
154 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
155 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
156 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
157 FROM metabib.author_field_entry WHERE source = bib_id;
159 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
160 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
161 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
162 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
163 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
164 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
165 FROM metabib.subject_field_entry WHERE source = bib_id;
167 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
168 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
169 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
170 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
171 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
172 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
173 FROM metabib.series_field_entry WHERE source = bib_id;
175 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
176 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
177 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
178 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
179 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
180 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
181 FROM metabib.identifier_field_entry WHERE source = bib_id;
184 $func$ LANGUAGE PLPGSQL;
186 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
189 ind_data metabib.field_entry_template%ROWTYPE;
190 mbe_row metabib.browse_entry%ROWTYPE;
193 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
195 IF NOT skip_search THEN
196 FOR fclass IN SELECT * FROM config.metabib_class LOOP
197 -- RAISE NOTICE 'Emptying out %', fclass.name;
198 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
201 IF NOT skip_facet THEN
202 DELETE FROM metabib.facet_entry WHERE source = bib_id;
204 IF NOT skip_browse THEN
205 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
209 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
210 IF ind_data.field < 0 THEN
211 ind_data.field = -1 * ind_data.field;
214 IF ind_data.facet_field AND NOT skip_facet THEN
215 INSERT INTO metabib.facet_entry (field, source, value)
216 VALUES (ind_data.field, ind_data.source, ind_data.value);
219 IF ind_data.browse_field AND NOT skip_browse THEN
220 -- A caveat about this SELECT: this should take care of replacing
221 -- old mbe rows when data changes, but not if normalization (by
222 -- which I mean specifically the output of
223 -- evergreen.oils_tsearch2()) changes. It may or may not be
224 -- expensive to add a comparison of index_vector to index_vector
225 -- to the WHERE clause below.
226 SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value;
228 mbe_id := mbe_row.id;
230 INSERT INTO metabib.browse_entry (value) VALUES
231 (metabib.browse_normalize(ind_data.value, ind_data.field));
232 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
235 INSERT INTO metabib.browse_entry_def_map (entry, def, source)
236 VALUES (mbe_id, ind_data.field, ind_data.source);
239 IF ind_data.search_field AND NOT skip_search THEN
241 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
243 quote_literal(ind_data.field) || $$, $$ ||
244 quote_literal(ind_data.source) || $$, $$ ||
245 quote_literal(ind_data.value) ||
251 IF NOT skip_search THEN
252 PERFORM metabib.update_combined_index_vectors(bib_id);
257 $func$ LANGUAGE PLPGSQL;
259 DROP FUNCTION IF EXISTS evergreen.oils_tsearch2() CASCADE;
260 DROP FUNCTION IF EXISTS public.oils_tsearch2() CASCADE;
262 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
266 temp_vector TEXT := '';
271 NEW.index_vector = ''::tsvector;
273 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
275 SELECT n.func AS func,
276 n.param_count AS param_count,
278 FROM config.index_normalizer n
279 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
280 WHERE field = NEW.field
282 EXECUTE 'SELECT ' || normalizer.func || '(' ||
283 quote_literal( value ) ||
285 WHEN normalizer.param_count > 0
286 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
295 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
296 value := ARRAY_TO_STRING(
297 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
299 value := public.search_normalize(value);
300 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
301 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
303 SELECT ts_config, index_weight
304 FROM config.metabib_class_ts_map
305 WHERE field_class = TG_ARGV[0]
306 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
307 AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)
309 SELECT ts_config, index_weight
310 FROM config.metabib_field_ts_map
311 WHERE metabib_field = NEW.field
312 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
313 ORDER BY index_weight ASC
315 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
316 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
319 cur_weight = ts_rec.index_weight;
320 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
322 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
324 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
331 CREATE TRIGGER authority_full_rec_fti_trigger
332 BEFORE UPDATE OR INSERT ON authority.full_rec
333 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
335 CREATE TRIGGER authority_simple_heading_fti_trigger
336 BEFORE UPDATE OR INSERT ON authority.simple_heading
337 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
339 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
340 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
341 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
343 CREATE TRIGGER metabib_title_field_entry_fti_trigger
344 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
345 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
347 CREATE TRIGGER metabib_author_field_entry_fti_trigger
348 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
349 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
351 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
352 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
353 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
355 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
356 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
357 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
359 CREATE TRIGGER metabib_series_field_entry_fti_trigger
360 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
361 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
363 CREATE TRIGGER metabib_browse_entry_fti_trigger
364 BEFORE INSERT OR UPDATE ON metabib.browse_entry
365 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
367 CREATE TRIGGER metabib_full_rec_fti_trigger
368 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
369 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
371 INSERT INTO config.ts_config_list(id, name) VALUES
372 ('simple','Non-Stemmed Simple'),
373 ('danish_nostop','Danish Stemmed'),
374 ('dutch_nostop','Dutch Stemmed'),
375 ('english_nostop','English Stemmed'),
376 ('finnish_nostop','Finnish Stemmed'),
377 ('french_nostop','French Stemmed'),
378 ('german_nostop','German Stemmed'),
379 ('hungarian_nostop','Hungarian Stemmed'),
380 ('italian_nostop','Italian Stemmed'),
381 ('norwegian_nostop','Norwegian Stemmed'),
382 ('portuguese_nostop','Portuguese Stemmed'),
383 ('romanian_nostop','Romanian Stemmed'),
384 ('russian_nostop','Russian Stemmed'),
385 ('spanish_nostop','Spanish Stemmed'),
386 ('swedish_nostop','Swedish Stemmed'),
387 ('turkish_nostop','Turkish Stemmed');
389 INSERT INTO config.metabib_class_ts_map(field_class, ts_config, index_weight, always) VALUES
390 ('keyword','simple','A',true),
391 ('keyword','english_nostop','C',true),
392 ('title','simple','A',true),
393 ('title','english_nostop','C',true),
394 ('author','simple','A',true),
395 ('author','english_nostop','C',true),
396 ('series','simple','A',true),
397 ('series','english_nostop','C',true),
398 ('subject','simple','A',true),
399 ('subject','english_nostop','C',true),
400 ('identifier','simple','A',true);
402 CREATE OR REPLACE FUNCTION evergreen.rel_bump(terms TEXT[], value TEXT, bumps TEXT[], mults NUMERIC[]) RETURNS NUMERIC AS
405 my ($terms,$value,$bumps,$mults) = @_;
409 for (my $id = 0; $id < @$bumps; $id++) {
410 if ($bumps->[$id] eq 'first_word') {
411 $retval *= $mults->[$id] if ($value =~ /^$terms->[0]/);
412 } elsif ($bumps->[$id] eq 'full_match') {
413 my $fullmatch = join(' ', @$terms);
414 $retval *= $mults->[$id] if ($value =~ /^$fullmatch$/);
415 } elsif ($bumps->[$id] eq 'word_order') {
416 my $wordorder = join('.*', @$terms);
417 $retval *= $mults->[$id] if ($value =~ /$wordorder/);
421 $BODY$ LANGUAGE plperlu IMMUTABLE STRICT COST 100;
423 UPDATE metabib.identifier_field_entry set value = value;
424 UPDATE metabib.title_field_entry set value = value;
425 UPDATE metabib.author_field_entry set value = value;
426 UPDATE metabib.subject_field_entry set value = value;
427 UPDATE metabib.keyword_field_entry set value = value;
428 UPDATE metabib.series_field_entry set value = value;
430 SELECT metabib.update_combined_index_vectors(id)
431 FROM biblio.record_entry