BEGIN; SELECT evergreen.upgrade_deps_block_check('0680', :eg_version); -- Not much use in having identifier-class fields be suggestions. Credit for the idea goes to Ben Shum. UPDATE config.metabib_field SET browse_field = FALSE WHERE id < 100 AND field_class = 'identifier'; --------------------------------------------------------------------------- -- The rest of this was tested on Evergreen Indiana's dev server, which has -- a large data set of 2.6M bibs, and was instrumental in sussing out the -- needed adjustments. Thanks, EG-IN! --------------------------------------------------------------------------- -- GIN indexes are /much/ better for prefix matching, which is important for browse and autosuggest DROP INDEX metabib.metabib_browse_entry_index_vector_idx; CREATE INDEX metabib_browse_entry_index_vector_idx ON metabib.browse_entry USING GIN (index_vector); -- We need thes to make the autosuggest limiting joins fast CREATE INDEX browse_entry_def_map_def_idx ON metabib.browse_entry_def_map (def); CREATE INDEX browse_entry_def_map_entry_idx ON metabib.browse_entry_def_map (entry); CREATE INDEX browse_entry_def_map_source_idx ON metabib.browse_entry_def_map (source); -- In practice this will always be ~1 row, and the default of 1000 causes terrible plans ALTER FUNCTION metabib.search_class_to_registered_components(text) ROWS 1; -- Reworking of the generated query to act in a sane manner in the face of large datasets CREATE OR REPLACE FUNCTION metabib.suggest_browse_entries( raw_query_text TEXT, -- actually typed by humans at the UI level search_class TEXT, -- 'alias' or 'class' or 'class|field..', etc headline_opts TEXT, -- markup options for ts_headline() visibility_org INTEGER,-- null if you don't want opac visibility test query_limit INTEGER,-- use in LIMIT clause of interal query normalization INTEGER -- argument to TS_RANK_CD() ) RETURNS TABLE ( value TEXT, -- plain field INTEGER, buoyant_and_class_match BOOL, field_match BOOL, field_weight INTEGER, rank REAL, buoyant BOOL, match TEXT -- marked up ) AS $func$ DECLARE prepared_query_texts TEXT[]; query TSQUERY; plain_query TSQUERY; opac_visibility_join TEXT; search_class_join TEXT; r_fields RECORD; BEGIN prepared_query_texts := metabib.autosuggest_prepare_tsquery(raw_query_text); query := TO_TSQUERY('keyword', prepared_query_texts[1]); plain_query := TO_TSQUERY('keyword', prepared_query_texts[2]); IF visibility_org IS NOT NULL THEN opac_visibility_join := ' JOIN asset.opac_visible_copies aovc ON ( aovc.record = x.source AND aovc.circ_lib IN (SELECT id FROM actor.org_unit_descendants($4)) )'; ELSE opac_visibility_join := ''; END IF; -- The following determines whether we only provide suggestsons matching -- the user's selected search_class, or whether we show other suggestions -- too. The reason for MIN() is that for search_classes like -- 'title|proper|uniform' you would otherwise get multiple rows. The -- implication is that if title as a class doesn't have restrict, -- nor does the proper field, but the uniform field does, you're going -- to get 'false' for your overall evaluation of 'should we restrict?' -- To invert that, change from MIN() to MAX(). SELECT INTO r_fields MIN(cmc.restrict::INT) AS restrict_class, MIN(cmf.restrict::INT) AS restrict_field FROM metabib.search_class_to_registered_components(search_class) AS _registered (field_class TEXT, field INT) JOIN config.metabib_class cmc ON (cmc.name = _registered.field_class) LEFT JOIN config.metabib_field cmf ON (cmf.id = _registered.field); -- evaluate 'should we restrict?' IF r_fields.restrict_field::BOOL OR r_fields.restrict_class::BOOL THEN search_class_join := ' JOIN metabib.search_class_to_registered_components($2) AS _registered (field_class TEXT, field INT) ON ( (_registered.field IS NULL AND _registered.field_class = cmf.field_class) OR (_registered.field = cmf.id) ) '; ELSE search_class_join := ' LEFT JOIN metabib.search_class_to_registered_components($2) AS _registered (field_class TEXT, field INT) ON ( _registered.field_class = cmc.name ) '; END IF; RETURN QUERY EXECUTE ' SELECT DISTINCT x.value, x.id, x.push, x.restrict, x.weight, x.ts_rank_cd, x.buoyant, TS_HEADLINE(value, $7, $3) FROM (SELECT DISTINCT mbe.value, cmf.id, cmc.buoyant AND _registered.field_class IS NOT NULL AS push, _registered.field = cmf.id AS restrict, cmf.weight, TS_RANK_CD(mbe.index_vector, $1, $6), cmc.buoyant, mbedm.source FROM metabib.browse_entry_def_map mbedm -- Start with a pre-limited set of 10k possible suggestions. More than that is not going to be useful anyway JOIN (SELECT * FROM metabib.browse_entry WHERE index_vector @@ $1 LIMIT 10000) mbe ON (mbe.id = mbedm.entry) JOIN config.metabib_field cmf ON (cmf.id = mbedm.def) JOIN config.metabib_class cmc ON (cmf.field_class = cmc.name) ' || search_class_join || ' ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC LIMIT 1000) AS x -- This outer limit makes testing for opac visibility usably fast ' || opac_visibility_join || ' ORDER BY 3 DESC, 4 DESC NULLS LAST, 5 DESC, 6 DESC, 7 DESC, 1 ASC LIMIT $5 ' -- sic, repeat the order by clause in the outer select too USING query, search_class, headline_opts, visibility_org, query_limit, normalization, plain_query ; -- sort order: -- buoyant AND chosen class = match class -- chosen field = match field -- field weight -- rank -- buoyancy -- value itself END; $func$ LANGUAGE PLPGSQL; COMMIT;