3 SET search_path = public, pg_catalog;
9 FOR lang IN SELECT substring(pptsd.dictname from '(.*)_stem$') AS lang FROM pg_catalog.pg_ts_dict pptsd JOIN pg_catalog.pg_namespace ppn ON ppn.oid = pptsd.dictnamespace
10 WHERE ppn.nspname = 'pg_catalog' AND pptsd.dictname LIKE '%_stem' LOOP
11 RAISE NOTICE 'FOUND LANGUAGE %', lang;
13 EXECUTE 'DROP TEXT SEARCH DICTIONARY IF EXISTS ' || lang || '_nostop CASCADE;
14 CREATE TEXT SEARCH DICTIONARY ' || lang || '_nostop (TEMPLATE=pg_catalog.snowball, language=''' || lang || ''');
15 COMMENT ON TEXT SEARCH DICTIONARY ' || lang || '_nostop IS ''' ||lang || ' snowball stemmer with no stopwords for ASCII words only.'';
16 CREATE TEXT SEARCH CONFIGURATION ' || lang || '_nostop ( COPY = pg_catalog.' || lang || ' );
17 ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR word, hword, hword_part WITH pg_catalog.simple;
18 ALTER TEXT SEARCH CONFIGURATION ' || lang || '_nostop ALTER MAPPING FOR asciiword, asciihword, hword_asciipart WITH ' || lang || '_nostop;';
23 CREATE TEXT SEARCH CONFIGURATION keyword ( COPY = english_nostop );
24 CREATE TEXT SEARCH CONFIGURATION "default" ( COPY = english_nostop );
26 SET search_path = evergreen, public, pg_catalog;
28 ALTER TABLE config.metabib_class
29 ADD COLUMN a_weight NUMERIC DEFAULT 1.0 NOT NULL,
30 ADD COLUMN b_weight NUMERIC DEFAULT 0.4 NOT NULL,
31 ADD COLUMN c_weight NUMERIC DEFAULT 0.2 NOT NULL,
32 ADD COLUMN d_weight NUMERIC DEFAULT 0.1 NOT NULL;
34 CREATE TABLE config.ts_config_list (
38 COMMENT ON TABLE config.ts_config_list IS $$
41 A list of full text configs with names and descriptions.
44 CREATE TABLE config.metabib_class_ts_map (
45 id SERIAL PRIMARY KEY,
46 field_class TEXT NOT NULL REFERENCES config.metabib_class (name),
47 ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id),
48 active BOOL NOT NULL DEFAULT TRUE,
49 index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')),
51 search_lang TEXT NULL,
52 always BOOL NOT NULL DEFAULT true
54 COMMENT ON TABLE config.metabib_class_ts_map IS $$
55 Text Search Configs for metabib class indexing
57 This table contains text search config definitions for
58 storing index_vector values.
61 CREATE TABLE config.metabib_field_ts_map (
62 id SERIAL PRIMARY KEY,
63 metabib_field INT NOT NULL REFERENCES config.metabib_field (id),
64 ts_config TEXT NOT NULL REFERENCES config.ts_config_list (id),
65 active BOOL NOT NULL DEFAULT TRUE,
66 index_weight CHAR(1) NOT NULL DEFAULT 'C' CHECK (index_weight IN ('A','B','C','D')),
70 COMMENT ON TABLE config.metabib_field_ts_map IS $$
71 Text Search Configs for metabib field indexing
73 This table contains text search config definitions for
74 storing index_vector values.
77 CREATE TABLE metabib.combined_identifier_field_entry (
78 record BIGINT NOT NULL,
79 metabib_field INT NULL,
80 index_vector tsvector NOT NULL
82 CREATE UNIQUE INDEX metabib_combined_identifier_field_entry_fakepk_idx ON metabib.combined_identifier_field_entry (record, COALESCE(metabib_field::TEXT,''));
83 CREATE INDEX metabib_combined_identifier_field_entry_index_vector_idx ON metabib.combined_identifier_field_entry USING GIST (index_vector);
84 CREATE INDEX metabib_combined_identifier_field_source_idx ON metabib.combined_identifier_field_entry (metabib_field);
86 CREATE TABLE metabib.combined_title_field_entry (
87 record BIGINT NOT NULL,
88 metabib_field INT NULL,
89 index_vector tsvector NOT NULL
91 CREATE UNIQUE INDEX metabib_combined_title_field_entry_fakepk_idx ON metabib.combined_title_field_entry (record, COALESCE(metabib_field::TEXT,''));
92 CREATE INDEX metabib_combined_title_field_entry_index_vector_idx ON metabib.combined_title_field_entry USING GIST (index_vector);
93 CREATE INDEX metabib_combined_title_field_source_idx ON metabib.combined_title_field_entry (metabib_field);
95 CREATE TABLE metabib.combined_author_field_entry (
96 record BIGINT NOT NULL,
97 metabib_field INT NULL,
98 index_vector tsvector NOT NULL
100 CREATE UNIQUE INDEX metabib_combined_author_field_entry_fakepk_idx ON metabib.combined_author_field_entry (record, COALESCE(metabib_field::TEXT,''));
101 CREATE INDEX metabib_combined_author_field_entry_index_vector_idx ON metabib.combined_author_field_entry USING GIST (index_vector);
102 CREATE INDEX metabib_combined_author_field_source_idx ON metabib.combined_author_field_entry (metabib_field);
104 CREATE TABLE metabib.combined_subject_field_entry (
105 record BIGINT NOT NULL,
106 metabib_field INT NULL,
107 index_vector tsvector NOT NULL
109 CREATE UNIQUE INDEX metabib_combined_subject_field_entry_fakepk_idx ON metabib.combined_subject_field_entry (record, COALESCE(metabib_field::TEXT,''));
110 CREATE INDEX metabib_combined_subject_field_entry_index_vector_idx ON metabib.combined_subject_field_entry USING GIST (index_vector);
111 CREATE INDEX metabib_combined_subject_field_source_idx ON metabib.combined_subject_field_entry (metabib_field);
113 CREATE TABLE metabib.combined_keyword_field_entry (
114 record BIGINT NOT NULL,
115 metabib_field INT NULL,
116 index_vector tsvector NOT NULL
118 CREATE UNIQUE INDEX metabib_combined_keyword_field_entry_fakepk_idx ON metabib.combined_keyword_field_entry (record, COALESCE(metabib_field::TEXT,''));
119 CREATE INDEX metabib_combined_keyword_field_entry_index_vector_idx ON metabib.combined_keyword_field_entry USING GIST (index_vector);
120 CREATE INDEX metabib_combined_keyword_field_source_idx ON metabib.combined_keyword_field_entry (metabib_field);
122 CREATE TABLE metabib.combined_series_field_entry (
123 record BIGINT NOT NULL,
124 metabib_field INT NULL,
125 index_vector tsvector NOT NULL
127 CREATE UNIQUE INDEX metabib_combined_series_field_entry_fakepk_idx ON metabib.combined_series_field_entry (record, COALESCE(metabib_field::TEXT,''));
128 CREATE INDEX metabib_combined_series_field_entry_index_vector_idx ON metabib.combined_series_field_entry USING GIST (index_vector);
129 CREATE INDEX metabib_combined_series_field_source_idx ON metabib.combined_series_field_entry (metabib_field);
131 CREATE OR REPLACE FUNCTION metabib.update_combined_index_vectors(bib_id BIGINT) RETURNS VOID AS $func$
133 DELETE FROM metabib.combined_keyword_field_entry WHERE record = bib_id;
134 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
135 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
136 FROM metabib.keyword_field_entry WHERE source = bib_id GROUP BY field;
137 INSERT INTO metabib.combined_keyword_field_entry(record, metabib_field, index_vector)
138 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
139 FROM metabib.keyword_field_entry WHERE source = bib_id;
141 DELETE FROM metabib.combined_title_field_entry WHERE record = bib_id;
142 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
143 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
144 FROM metabib.title_field_entry WHERE source = bib_id GROUP BY field;
145 INSERT INTO metabib.combined_title_field_entry(record, metabib_field, index_vector)
146 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
147 FROM metabib.title_field_entry WHERE source = bib_id;
149 DELETE FROM metabib.combined_author_field_entry WHERE record = bib_id;
150 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
151 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
152 FROM metabib.author_field_entry WHERE source = bib_id GROUP BY field;
153 INSERT INTO metabib.combined_author_field_entry(record, metabib_field, index_vector)
154 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
155 FROM metabib.author_field_entry WHERE source = bib_id;
157 DELETE FROM metabib.combined_subject_field_entry WHERE record = bib_id;
158 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
159 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
160 FROM metabib.subject_field_entry WHERE source = bib_id GROUP BY field;
161 INSERT INTO metabib.combined_subject_field_entry(record, metabib_field, index_vector)
162 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
163 FROM metabib.subject_field_entry WHERE source = bib_id;
165 DELETE FROM metabib.combined_series_field_entry WHERE record = bib_id;
166 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
167 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
168 FROM metabib.series_field_entry WHERE source = bib_id GROUP BY field;
169 INSERT INTO metabib.combined_series_field_entry(record, metabib_field, index_vector)
170 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
171 FROM metabib.series_field_entry WHERE source = bib_id;
173 DELETE FROM metabib.combined_identifier_field_entry WHERE record = bib_id;
174 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
175 SELECT bib_id, field, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
176 FROM metabib.identifier_field_entry WHERE source = bib_id GROUP BY field;
177 INSERT INTO metabib.combined_identifier_field_entry(record, metabib_field, index_vector)
178 SELECT bib_id, NULL, strip(COALESCE(string_agg(index_vector::TEXT,' '),'')::tsvector)
179 FROM metabib.identifier_field_entry WHERE source = bib_id;
182 $func$ LANGUAGE PLPGSQL;
184 CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
187 ind_data metabib.field_entry_template%ROWTYPE;
188 mbe_row metabib.browse_entry%ROWTYPE;
191 PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
193 IF NOT skip_search THEN
194 FOR fclass IN SELECT * FROM config.metabib_class LOOP
195 -- RAISE NOTICE 'Emptying out %', fclass.name;
196 EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
199 IF NOT skip_facet THEN
200 DELETE FROM metabib.facet_entry WHERE source = bib_id;
202 IF NOT skip_browse THEN
203 DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
207 FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
208 IF ind_data.field < 0 THEN
209 ind_data.field = -1 * ind_data.field;
212 IF ind_data.facet_field AND NOT skip_facet THEN
213 INSERT INTO metabib.facet_entry (field, source, value)
214 VALUES (ind_data.field, ind_data.source, ind_data.value);
217 IF ind_data.browse_field AND NOT skip_browse THEN
218 -- A caveat about this SELECT: this should take care of replacing
219 -- old mbe rows when data changes, but not if normalization (by
220 -- which I mean specifically the output of
221 -- evergreen.oils_tsearch2()) changes. It may or may not be
222 -- expensive to add a comparison of index_vector to index_vector
223 -- to the WHERE clause below.
224 SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = ind_data.value;
226 mbe_id := mbe_row.id;
228 INSERT INTO metabib.browse_entry (value) VALUES
229 (metabib.browse_normalize(ind_data.value, ind_data.field));
230 mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
233 INSERT INTO metabib.browse_entry_def_map (entry, def, source)
234 VALUES (mbe_id, ind_data.field, ind_data.source);
237 IF ind_data.search_field AND NOT skip_search THEN
239 INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
241 quote_literal(ind_data.field) || $$, $$ ||
242 quote_literal(ind_data.source) || $$, $$ ||
243 quote_literal(ind_data.value) ||
249 IF NOT skip_search THEN
250 PERFORM metabib.update_combined_index_vectors(bib_id);
255 $func$ LANGUAGE PLPGSQL;
257 DROP FUNCTION IF EXISTS evergreen.oils_tsearch2() CASCADE;
258 DROP FUNCTION IF EXISTS public.oils_tsearch2() CASCADE;
260 CREATE OR REPLACE FUNCTION public.oils_tsearch2 () RETURNS TRIGGER AS $$
264 temp_vector TEXT := '';
269 NEW.index_vector = ''::tsvector;
271 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
273 SELECT n.func AS func,
274 n.param_count AS param_count,
276 FROM config.index_normalizer n
277 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
278 WHERE field = NEW.field
280 EXECUTE 'SELECT ' || normalizer.func || '(' ||
281 quote_literal( value ) ||
283 WHEN normalizer.param_count > 0
284 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
293 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
294 value := ARRAY_TO_STRING(
295 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
297 value := public.search_normalize(value);
298 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
299 ELSIF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
301 SELECT ts_config, index_weight
302 FROM config.metabib_class_ts_map
303 WHERE field_class = TG_ARGV[0]
304 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
305 AND always OR NOT EXISTS (SELECT 1 FROM config.metabib_field_ts_map WHERE metabib_field = NEW.field)
307 SELECT ts_config, index_weight
308 FROM config.metabib_field_ts_map
309 WHERE metabib_field = NEW.field
310 AND index_lang IS NULL OR EXISTS (SELECT 1 FROM metabib.record_attr WHERE id = NEW.source AND index_lang IN(attrs->'item_lang',attrs->'language'))
311 ORDER BY index_weight ASC
313 IF cur_weight IS NOT NULL AND cur_weight != ts_rec.index_weight THEN
314 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
317 cur_weight = ts_rec.index_weight;
318 SELECT INTO temp_vector temp_vector || ' ' || to_tsvector(ts_rec.ts_config::regconfig, value)::TEXT;
320 NEW.index_vector = NEW.index_vector || setweight(temp_vector::tsvector,cur_weight);
322 NEW.index_vector = to_tsvector(TG_ARGV[0]::regconfig, value);
329 CREATE TRIGGER authority_full_rec_fti_trigger
330 BEFORE UPDATE OR INSERT ON authority.full_rec
331 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
333 CREATE TRIGGER authority_simple_heading_fti_trigger
334 BEFORE UPDATE OR INSERT ON authority.simple_heading
335 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
337 CREATE TRIGGER metabib_identifier_field_entry_fti_trigger
338 BEFORE UPDATE OR INSERT ON metabib.identifier_field_entry
339 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('identifier');
341 CREATE TRIGGER metabib_title_field_entry_fti_trigger
342 BEFORE UPDATE OR INSERT ON metabib.title_field_entry
343 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('title');
345 CREATE TRIGGER metabib_author_field_entry_fti_trigger
346 BEFORE UPDATE OR INSERT ON metabib.author_field_entry
347 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('author');
349 CREATE TRIGGER metabib_subject_field_entry_fti_trigger
350 BEFORE UPDATE OR INSERT ON metabib.subject_field_entry
351 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('subject');
353 CREATE TRIGGER metabib_keyword_field_entry_fti_trigger
354 BEFORE UPDATE OR INSERT ON metabib.keyword_field_entry
355 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
357 CREATE TRIGGER metabib_series_field_entry_fti_trigger
358 BEFORE UPDATE OR INSERT ON metabib.series_field_entry
359 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('series');
361 CREATE TRIGGER metabib_browse_entry_fti_trigger
362 BEFORE INSERT OR UPDATE ON metabib.browse_entry
363 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('keyword');
365 CREATE TRIGGER metabib_full_rec_fti_trigger
366 BEFORE UPDATE OR INSERT ON metabib.real_full_rec
367 FOR EACH ROW EXECUTE PROCEDURE oils_tsearch2('default');
369 INSERT INTO config.ts_config_list(id, name) VALUES
370 ('simple','Non-Stemmed Simple'),
371 ('danish_nostop','Danish Stemmed'),
372 ('dutch_nostop','Dutch Stemmed'),
373 ('english_nostop','English Stemmed'),
374 ('finnish_nostop','Finnish Stemmed'),
375 ('french_nostop','French Stemmed'),
376 ('german_nostop','German Stemmed'),
377 ('hungarian_nostop','Hungarian Stemmed'),
378 ('italian_nostop','Italian Stemmed'),
379 ('norwegian_nostop','Norwegian Stemmed'),
380 ('portuguese_nostop','Portuguese Stemmed'),
381 ('romanian_nostop','Romanian Stemmed'),
382 ('russian_nostop','Russian Stemmed'),
383 ('spanish_nostop','Spanish Stemmed'),
384 ('swedish_nostop','Swedish Stemmed'),
385 ('turkish_nostop','Turkish Stemmed');
387 INSERT INTO config.metabib_class_ts_map(field_class, ts_config, index_weight, always) VALUES
388 ('keyword','simple','A',true),
389 ('keyword','english_nostop','C',true),
390 ('title','simple','A',true),
391 ('title','english_nostop','C',true),
392 ('author','simple','A',true),
393 ('author','english_nostop','C',true),
394 ('series','simple','A',true),
395 ('series','english_nostop','C',true),
396 ('subject','simple','A',true),
397 ('subject','english_nostop','C',true),
398 ('identifier','simple','A',true);
400 CREATE OR REPLACE FUNCTION evergreen.rel_bump(terms TEXT[], value TEXT, bumps TEXT[], mults NUMERIC[]) RETURNS NUMERIC AS
403 my ($terms,$value,$bumps,$mults) = @_;
407 for (my $id = 0; $id < @$bumps; $id++) {
408 if ($bumps->[$id] eq 'first_word') {
409 $retval *= $mults->[$id] if ($value =~ /^$terms->[0]/);
410 } elsif ($bumps->[$id] eq 'full_match') {
411 my $fullmatch = join(' ', @$terms);
412 $retval *= $mults->[$id] if ($value =~ /^$fullmatch$/);
413 } elsif ($bumps->[$id] eq 'word_order') {
414 my $wordorder = join('.*', @$terms);
415 $retval *= $mults->[$id] if ($value =~ /$wordorder/);
419 $BODY$ LANGUAGE plperlu IMMUTABLE STRICT COST 100;
421 UPDATE metabib.identifier_field_entry set value = value;
422 UPDATE metabib.title_field_entry set value = value;
423 UPDATE metabib.author_field_entry set value = value;
424 UPDATE metabib.subject_field_entry set value = value;
425 UPDATE metabib.keyword_field_entry set value = value;
426 UPDATE metabib.series_field_entry set value = value;
428 SELECT metabib.update_combined_index_vectors(id)
429 FROM biblio.record_entry