3 SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
5 CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
13 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
15 SELECT n.func AS func,
16 n.param_count AS param_count,
18 FROM config.index_normalizer n
19 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
20 WHERE field = NEW.field AND m.pos < 0
22 EXECUTE 'SELECT ' || normalizer.func || '(' ||
23 quote_literal( value ) ||
25 WHEN normalizer.param_count > 0
26 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
36 IF NEW.index_vector = ''::tsvector THEN
40 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
42 SELECT n.func AS func,
43 n.param_count AS param_count,
45 FROM config.index_normalizer n
46 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
47 WHERE field = NEW.field AND m.pos >= 0
49 EXECUTE 'SELECT ' || normalizer.func || '(' ||
50 quote_literal( value ) ||
52 WHEN normalizer.param_count > 0
53 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
61 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
62 value := ARRAY_TO_STRING(
63 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
65 value := public.search_normalize(value);
68 NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);
74 -- Given a string such as a user might type into a search box, prepare
75 -- two changed variants for TO_TSQUERY(). See
76 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
77 -- The first variant is normalized to match indexed documents regardless
78 -- of diacritics. The second variant keeps its diacritics for proper
79 -- highlighting via TS_HEADLINE().
81 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
84 orig_ended_in_space BOOLEAN;
89 orig_ended_in_space := orig ~ E'\\s$';
91 orig := ARRAY_TO_STRING(
92 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
95 normalized := public.search_normalize(orig); -- also trim()s
98 IF NOT orig_ended_in_space THEN
99 plain := plain || ':*';
100 normalized := normalized || ':*';
103 plain := ARRAY_TO_STRING(
104 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
106 normalized := ARRAY_TO_STRING(
107 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
110 RETURN ARRAY[normalized, plain];
115 -- Definition of OUT parameters changes, so must drop first
116 DROP FUNCTION IF EXISTS metabib.suggest_browse_entries (TEXT, TEXT, TEXT, INTEGER, INTEGER, INTEGER);
119 FUNCTION metabib.suggest_browse_entries(
120 raw_query_text TEXT, -- actually typed by humans at the UI level
121 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
122 headline_opts TEXT, -- markup options for ts_headline()
123 visibility_org INTEGER,-- null if you don't want opac visibility test
124 query_limit INTEGER,-- use in LIMIT clause of interal query
125 normalization INTEGER -- argument to TS_RANK_CD()
129 buoyant_and_class_match BOOL,
131 field_weight INTEGER,
134 match TEXT -- marked up
137 prepared_query_texts TEXT[];
140 opac_visibility_join TEXT;
141 search_class_join TEXT;
144 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
146 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
147 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
149 IF visibility_org IS NOT NULL THEN
150 opac_visibility_join := '
151 JOIN asset.opac_visible_copies aovc ON (
152 aovc.record = mbedm.source AND
153 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
156 opac_visibility_join := '';
159 -- The following determines whether we only provide suggestsons matching
160 -- the user's selected search_class, or whether we show other suggestions
161 -- too. The reason for MIN() is that for search_classes like
162 -- 'title|proper|uniform' you would otherwise get multiple rows. The
163 -- implication is that if title as a class doesn't have restrict,
164 -- nor does the proper field, but the uniform field does, you're going
165 -- to get 'false' for your overall evaluation of 'should we restrict?'
166 -- To invert that, change from MIN() to MAX().
170 MIN(cmc.restrict::INT) AS restrict_class,
171 MIN(cmf.restrict::INT) AS restrict_field
172 FROM metabib.search_class_to_registered_components(search_class)
173 AS _registered (field_class TEXT, field INT)
175 config.metabib_class cmc ON (cmc.name = _registered.field_class)
177 config.metabib_field cmf ON (cmf.id = _registered.field);
179 -- evaluate 'should we restrict?'
180 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
181 search_class_join := '
183 metabib.search_class_to_registered_components($2)
184 AS _registered (field_class TEXT, field INT) ON (
185 (_registered.field IS NULL AND
186 _registered.field_class = cmf.field_class) OR
187 (_registered.field = cmf.id)
191 search_class_join := '
193 metabib.search_class_to_registered_components($2)
194 AS _registered (field_class TEXT, field INT) ON (
195 _registered.field_class = cmc.name
200 RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
203 cmc.buoyant AND _registered.field_class IS NOT NULL,
204 _registered.field = cmf.id,
206 TS_RANK_CD(mbe.index_vector, $1, $6),
208 FROM metabib.browse_entry_def_map mbedm
209 JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
210 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
211 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
212 ' || search_class_join || opac_visibility_join ||
213 ' WHERE $1 @@ mbe.index_vector
214 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
216 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
217 ' -- sic, repeat the order by clause in the outer select too
219 query, search_class, headline_opts,
220 visibility_org, query_limit, normalization, plain_query
224 -- buoyant AND chosen class = match class
225 -- chosen field = match field
232 $func$ LANGUAGE PLPGSQL;
236 \qecho The following takes about a minute per 100,000 rows in
237 \qecho metabib.browse_entry on my development system, which is only a VM with
238 \qecho 4 GB of memory and 2 cores.
240 \qecho The following is a very loose estimate of how long the next UPDATE
241 \qecho statement would take to finish on MY machine, based on YOUR number
242 \qecho of rows in metabib.browse_entry:
245 SELECT (COUNT(id) / 100000.0) * INTERVAL '1 minute'
246 AS "approximate duration of following UPDATE statement"
247 FROM metabib.browse_entry;
249 UPDATE metabib.browse_entry SET index_vector = TO_TSVECTOR(
251 public.search_normalize(
253 evergreen.regexp_split_to_array(value, E'\\W+'), ' '