3 SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
5 CREATE OR REPLACE FUNCTION oils_tsearch2 () RETURNS TRIGGER AS $$
13 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
15 SELECT n.func AS func,
16 n.param_count AS param_count,
18 FROM config.index_normalizer n
19 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
20 WHERE field = NEW.field AND m.pos < 0
22 EXECUTE 'SELECT ' || normalizer.func || '(' ||
23 quote_literal( value ) ||
25 WHEN normalizer.param_count > 0
26 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
36 IF NEW.index_vector = ''::tsvector THEN
40 IF TG_TABLE_NAME::TEXT ~ 'field_entry$' THEN
42 SELECT n.func AS func,
43 n.param_count AS param_count,
45 FROM config.index_normalizer n
46 JOIN config.metabib_field_index_norm_map m ON (m.norm = n.id)
47 WHERE field = NEW.field AND m.pos >= 0
49 EXECUTE 'SELECT ' || normalizer.func || '(' ||
50 quote_literal( value ) ||
52 WHEN normalizer.param_count > 0
53 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
61 IF TG_TABLE_NAME::TEXT ~ 'browse_entry$' THEN
62 value := ARRAY_TO_STRING(
63 evergreen.regexp_split_to_array(value, E'\\W+'), ' '
65 value := public.search_normalize(value);
68 NEW.index_vector = to_tsvector((TG_ARGV[0])::regconfig, value);
74 -- Given a string such as a user might type into a search box, prepare
75 -- two changed variants for TO_TSQUERY(). See
76 -- http://www.postgresql.org/docs/9.0/static/textsearch-controls.html
77 -- The first variant is normalized to match indexed documents regardless
78 -- of diacritics. The second variant keeps its diacritics for proper
79 -- highlighting via TS_HEADLINE().
81 FUNCTION metabib.autosuggest_prepare_tsquery(orig TEXT) RETURNS TEXT[] AS
84 orig_ended_in_space BOOLEAN;
89 orig_ended_in_space := orig ~ E'\\s$';
91 orig := ARRAY_TO_STRING(
92 evergreen.regexp_split_to_array(orig, E'\\W+'), ' '
95 normalized := public.search_normalize(orig); -- also trim()s
98 IF NOT orig_ended_in_space THEN
99 plain := plain || ':*';
100 normalized := normalized || ':*';
103 plain := ARRAY_TO_STRING(
104 evergreen.regexp_split_to_array(plain, E'\\s+'), ' & '
106 normalized := ARRAY_TO_STRING(
107 evergreen.regexp_split_to_array(normalized, E'\\s+'), ' & '
110 RETURN ARRAY[normalized, plain];
116 FUNCTION metabib.suggest_browse_entries(
117 raw_query_text TEXT, -- actually typed by humans at the UI level
118 search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc
119 headline_opts TEXT, -- markup options for ts_headline()
120 visibility_org INTEGER,-- null if you don't want opac visibility test
121 query_limit INTEGER,-- use in LIMIT clause of interal query
122 normalization INTEGER -- argument to TS_RANK_CD()
126 bouyant_and_class_match BOOL,
128 field_weight INTEGER,
131 match TEXT -- marked up
134 prepared_query_texts TEXT[];
137 opac_visibility_join TEXT;
138 search_class_join TEXT;
141 prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text);
143 query := TO_TSQUERY('keyword', prepared_query_texts[1]);
144 plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]);
146 IF visibility_org IS NOT NULL THEN
147 opac_visibility_join := '
148 JOIN asset.opac_visible_copies aovc ON (
149 aovc.record = mbedm.source AND
150 aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4))
153 opac_visibility_join := '';
156 -- The following determines whether we only provide suggestsons matching
157 -- the user's selected search_class, or whether we show other suggestions
158 -- too. The reason for MIN() is that for search_classes like
159 -- 'title|proper|uniform' you would otherwise get multiple rows. The
160 -- implication is that if title as a class doesn't have restrict,
161 -- nor does the proper field, but the uniform field does, you're going
162 -- to get 'false' for your overall evaluation of 'should we restrict?'
163 -- To invert that, change from MIN() to MAX().
167 MIN(cmc.restrict::INT) AS restrict_class,
168 MIN(cmf.restrict::INT) AS restrict_field
169 FROM metabib.search_class_to_registered_components(search_class)
170 AS _registered (field_class TEXT, field INT)
172 config.metabib_class cmc ON (cmc.name = _registered.field_class)
174 config.metabib_field cmf ON (cmf.id = _registered.field);
176 -- evaluate 'should we restrict?'
177 IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN
178 search_class_join := '
180 metabib.search_class_to_registered_components($2)
181 AS _registered (field_class TEXT, field INT) ON (
182 (_registered.field IS NULL AND
183 _registered.field_class = cmf.field_class) OR
184 (_registered.field = cmf.id)
188 search_class_join := '
190 metabib.search_class_to_registered_components($2)
191 AS _registered (field_class TEXT, field INT) ON (
192 _registered.field_class = cmc.name
197 RETURN QUERY EXECUTE 'SELECT *, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT
200 cmc.bouyant AND _registered.field_class IS NOT NULL,
201 _registered.field = cmf.id,
203 TS_RANK_CD(mbe.index_vector, $1, $6),
205 FROM metabib.browse_entry_def_map mbedm
206 JOIN metabib.browse_entry mbe ON (mbe.id = mbedm.entry)
207 JOIN config.metabib_field cmf ON (cmf.id = mbedm.def)
208 JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name)
209 ' || search_class_join || opac_visibility_join ||
210 ' WHERE $1 @@ mbe.index_vector
211 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
213 ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC
214 ' -- sic, repeat the order by clause in the outer select too
216 query, search_class, headline_opts,
217 visibility_org, query_limit, normalization, plain_query
221 -- bouyant AND chosen class = match class
222 -- chosen field = match field
229 $func$ LANGUAGE PLPGSQL;
233 \qecho The following takes about a minute per 100,000 rows in
234 \qecho metabib.browse_entry on my development system, which is only a VM with
235 \qecho 4 GB of memory and 2 cores.
237 \qecho The following is a very loose estimate of how long the next UPDATE
238 \qecho statement would take to finish on MY machine, based on YOUR number
239 \qecho of rows in metabib.browse_entry:
242 SELECT (COUNT(id) / 100000.0) * INTERVAL '1 minute'
243 AS "approximate duration of following UPDATE statement"
244 FROM metabib.browse_entry;
246 UPDATE metabib.browse_entry SET index_vector = TO_TSVECTOR(
248 public.search_normalize(
250 evergreen.regexp_split_to_array(value, E'\\W+'), ' '