From 01ffba4e5e8ed9abb640f288a997f72804b77ebb Mon Sep 17 00:00:00 2001 From: Galen Charlton Date: Fri, 23 Oct 2015 16:29:38 +0000 Subject: [PATCH 1/1] LP#1505286: limit number of facets retrieved This patch teaches search how to limit the number of facets retrieved per defined facet field. Setting a limit is useful so that open-ils.cstore backends don't end up needlessly consuming memory when fetching facets for a large result set; if a broad search retrieves over 10,000 author facets (say), even the most persistant user is not going to actually look at all of them. Fetching fewer facets can also slightly speed up generation of search results. The limit is controlled by a new global flag, search.max_facets_per_field, whose label is "Search: maximum number of facet values to retrieve for each facet field". The default limit value is 1,000, but lower values (e.g., 100) are perhaps even better for most catalogs. To test: [1] Upon applying the patch, set the value of the search.max_facets_per_field global flag to a small value. [2] Perform some searches and verify that the number of facets retrieved doesn't exceed the limit; note that the limit is per facet *field*, not overall or per field class. Signed-off-by: Galen Charlton Signed-off-by: Ben Shum --- .../lib/OpenILS/Application/Search/Biblio.pm | 40 ++----------- .../src/sql/Pg/300.schema.staged_search.sql | 39 ++++++++++++- Open-ILS/src/sql/Pg/950.data.seed-values.sql | 13 +++++ .../Pg/upgrade/XXXX.schema.limit_facets.sql | 56 +++++++++++++++++++ 4 files changed, 113 insertions(+), 35 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql diff --git a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm index 777ef7e50f..1e6a485121 100644 --- a/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm +++ b/Open-ILS/src/perlmods/lib/OpenILS/Application/Search/Biblio.pm @@ -1565,43 +1565,15 @@ sub cache_facets { return undef unless (@$results); - # The query we're constructing - # - # select mfae.field as id, - # mfae.value, - # count(distinct mmrsm.appropriate-id-field ) - # from metabib.facet_entry mfae - # join metabib.metarecord_sourc_map mmrsm on (mfae.source = mmrsm.source) - # where mmrsm.appropriate-id-field in IDLIST - # group by 1,2; - - my $count_field = $metabib ? 'metarecord' : 'source'; + my $facets_function = $metabib ? 'search.facets_for_metarecord_set' + : 'search.facets_for_record_set'; + my $results_str = '{' . join(',', @$results) . '}'; + my $ignore_str = ref($ignore) ? '{' . join(',', @$ignore) . '}' + : '{}'; my $query = { - select => { - mfae => [ { column => 'field', alias => 'id'}, 'value' ], - mmrsm => [{ - transform => 'count', - distinct => 1, - column => $count_field, - alias => 'count', - aggregate => 1 - }] - }, - from => { - mfae => { - mmrsm => { field => 'source', fkey => 'source' }, - cmf => { field => 'id', fkey => 'field' } - } - }, - where => { - '+mmrsm' => { $count_field => $results }, - '+cmf' => { facet_field => 't' } - } + from => [ $facets_function, $ignore_str, $results_str ] }; - $query->{where}->{'+cmf'}->{field_class} = {'not in' => $ignore} - if ref($ignore) and @$ignore > 0; - my $facets = OpenILS::Utils::CStoreEditor->new->json_query($query, {substream => 1}); for my $facet (@$facets) { diff --git a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql index 9fa639fe16..e3f96c0052 100644 --- a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql +++ b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql @@ -429,5 +429,42 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; - +CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$ + SELECT id, value, count FROM ( + SELECT mfae.field AS id, + mfae.value, + COUNT(DISTINCT mmrsm.source), + row_number() OVER ( + PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.source) DESC + ) AS rownum + FROM metabib.facet_entry mfae + JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source) + JOIN config.metabib_field cmf ON (cmf.id = mfae.field) + WHERE mmrsm.source IN (SELECT * FROM unnest(hits)) + AND cmf.facet_field + AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes)) + GROUP by 1, 2 + ) all_facets + WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000)); +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$ + SELECT id, value, count FROM ( + SELECT mfae.field AS id, + mfae.value, + COUNT(DISTINCT mmrsm.metarecord), + row_number() OVER ( + PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC + ) AS rownum + FROM metabib.facet_entry mfae + JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source) + JOIN config.metabib_field cmf ON (cmf.id = mfae.field) + WHERE mmrsm.metarecord IN (SELECT * FROM unnest(hits)) + AND cmf.facet_field + AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes)) + GROUP by 1, 2 + ) all_facets + WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000)); +$$ LANGUAGE SQL; + COMMIT; diff --git a/Open-ILS/src/sql/Pg/950.data.seed-values.sql b/Open-ILS/src/sql/Pg/950.data.seed-values.sql index 2505f50c67..5b8fccec6d 100644 --- a/Open-ILS/src/sql/Pg/950.data.seed-values.sql +++ b/Open-ILS/src/sql/Pg/950.data.seed-values.sql @@ -14683,6 +14683,19 @@ INSERT INTO config.global_flag (name, label, value, enabled) VALUES ( TRUE ); +INSERT INTO config.global_flag (name, value, label, enabled) + VALUES ( + 'search.max_facets_per_field', + '1000', + oils_i18n_gettext( + 'search.max_facets_per_field', + 'Search: maximum number of facet values to retrieve for each facet field', + 'cgf', + 'label' + ), + TRUE + ); + INSERT INTO config.org_unit_setting_type (name, grp, label, description, datatype) VALUES diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql new file mode 100644 index 0000000000..314063421b --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.limit_facets.sql @@ -0,0 +1,56 @@ +BEGIN; + +--- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version); + +CREATE OR REPLACE FUNCTION search.facets_for_record_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$ + SELECT id, value, count FROM ( + SELECT mfae.field AS id, + mfae.value, + COUNT(DISTINCT mmrsm.source), + row_number() OVER ( + PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.source) DESC + ) AS rownum + FROM metabib.facet_entry mfae + JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source) + JOIN config.metabib_field cmf ON (cmf.id = mfae.field) + WHERE mmrsm.source IN (SELECT * FROM unnest(hits)) + AND cmf.facet_field + AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes)) + GROUP by 1, 2 + ) all_facets + WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000)); +$$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION search.facets_for_metarecord_set(ignore_facet_classes TEXT[], hits BIGINT[]) RETURNS TABLE (id INT, value TEXT, count BIGINT) AS $$ + SELECT id, value, count FROM ( + SELECT mfae.field AS id, + mfae.value, + COUNT(DISTINCT mmrsm.metarecord), + row_number() OVER ( + PARTITION BY mfae.field ORDER BY COUNT(distinct mmrsm.metarecord) DESC + ) AS rownum + FROM metabib.facet_entry mfae + JOIN metabib.metarecord_source_map mmrsm ON (mfae.source = mmrsm.source) + JOIN config.metabib_field cmf ON (cmf.id = mfae.field) + WHERE mmrsm.metarecord IN (SELECT * FROM unnest(hits)) + AND cmf.facet_field + AND cmf.field_class NOT IN (SELECT * FROM unnest(ignore_facet_classes)) + GROUP by 1, 2 + ) all_facets + WHERE rownum <= (SELECT COALESCE((SELECT value::INT FROM config.global_flag WHERE name = 'search.max_facets_per_field' AND enabled), 1000)); +$$ LANGUAGE SQL; + +INSERT INTO config.global_flag (name, value, label, enabled) + VALUES ( + 'search.max_facets_per_field', + '1000', + oils_i18n_gettext( + 'search.max_facets_per_field', + 'Search: maximum number of facet values to retrieve for each facet field', + 'cgf', + 'label' + ), + TRUE + ); + +COMMIT; -- 2.43.2