From a67ad7f9a2419301e16d79a078a4a41dcf36d788 Mon Sep 17 00:00:00 2001
From: Mike Rylander <mrylander@gmail.com>
Date: Mon, 7 Jun 2021 15:57:32 -0400
Subject: [PATCH] LP#1931162: DYM needs some situational optimization

For some data sets and some queries the Did You Mean search suggestion
logic can be much too slow. This is mainly in cases where a "misspelled"
word of sufficient length greater than the symspell prefix length is
checked against many short prefixes that have many long suggestions
attached to them.

This commit optimizes for that case in particular by testing the length
of suggestions and prefix keys against the user input to avoid
unnecessary tests.  Futher, it captures the edit distance of suggestions
that pass that test in-line, avoiding expensive retesting, and caches
the short-cutoff edit distance when in low-verbosity mode to avoid
future different-but-not-too-different suggestions coming from the same
prefix key.

It additionally provides a general optimization by batching the capture
of suggest counts to avoid per-suggestion secondary lookups, and a
micro-optimization of ordering suggestions by length at distance cache
time.

Signed-off-by: Mike Rylander <mrylander@gmail.com>
Signed-off-by: Shula Link <slink@gchrl.org>
Signed-off-by: Jason Boyer <JBoyer@equinoxOLI.org>
---
 .../src/sql/Pg/300.schema.staged_search.sql   | 205 +++++++-------
 ...XXX.function.did_you_mean_optimization.sql | 268 ++++++++++++++++++
 2 files changed, 375 insertions(+), 98 deletions(-)
 create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql

diff --git a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
index eba7b96d46..ece24083ba 100644
--- a/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
+++ b/Open-ILS/src/sql/Pg/300.schema.staged_search.sql
@@ -1587,19 +1587,21 @@ CREATE TYPE search.symspell_lookup_output AS (
     word_pos            INT
 );
 
-CREATE OR REPLACE FUNCTION search.symspell_lookup (
-    raw_input       TEXT,
-    search_class    TEXT,
-    verbosity       INT DEFAULT 2,
-    xfer_case       BOOL DEFAULT FALSE,
-    count_threshold INT DEFAULT 1,
-    soundex_weight  INT DEFAULT 0,
-    pg_trgm_weight  INT DEFAULT 0,
-    kbdist_weight   INT DEFAULT 0
-) RETURNS SETOF search.symspell_lookup_output AS $F$
+
+CREATE OR REPLACE FUNCTION search.symspell_lookup(
+    raw_input text,
+    search_class text,
+    verbosity integer DEFAULT 2,
+    xfer_case boolean DEFAULT false,
+    count_threshold integer DEFAULT 1,
+    soundex_weight integer DEFAULT 0,
+    pg_trgm_weight integer DEFAULT 0,
+    kbdist_weight integer DEFAULT 0
+) RETURNS SETOF search.symspell_lookup_output LANGUAGE plpgsql AS $function$
 DECLARE
     prefix_length INT;
     maxED         INT;
+    good_suggs  HSTORE;
     word_list   TEXT[];
     edit_list   TEXT[] := '{}';
     seen_list   TEXT[] := '{}';
@@ -1614,6 +1616,8 @@ DECLARE
     w_pos       INT := -1;
     smallest_ed INT := -1;
     global_ed   INT;
+    i_len       INT;
+    l_maxED     INT;
 BEGIN
     SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
     prefix_length := COALESCE(prefix_length, 6);
@@ -1631,8 +1635,8 @@ BEGIN
                    prefix_key
              FROM  search.symspell_dictionary
              WHERE prefix_key = $1
-                   AND '||search_class||'_count >= $2 
-                   AND '||search_class||'_suggestions @> ARRAY[$1]' 
+                   AND '||search_class||'_count >= $2
+                   AND '||search_class||'_suggestions @> ARRAY[$1]'
           INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
         IF entry.prefix_key IS NOT NULL THEN
             output.lev_distance := 0; -- definitionally
@@ -1658,12 +1662,14 @@ BEGIN
     FOREACH word IN ARRAY word_list LOOP
         w_pos := w_pos + 1;
         input := evergreen.lowercase(word);
+        i_len := CHARACTER_LENGTH(input);
+        l_maxED := maxED;
 
         IF CHARACTER_LENGTH(input) > prefix_length THEN
             prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
-            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, maxED);
+            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, l_maxED);
         ELSE
-            edit_list := input || search.symspell_generate_edits(input, 1, maxED);
+            edit_list := input || search.symspell_generate_edits(input, 1, l_maxED);
         END IF;
 
         SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
@@ -1678,106 +1684,109 @@ BEGIN
             IF global_ed IS NOT NULL THEN
                 smallest_ed := global_ed;
             END IF;
+
             FOR entry IN EXECUTE
                 'SELECT  '||search_class||'_suggestions AS suggestions,
                          '||search_class||'_count AS count,
                          prefix_key
                    FROM  search.symspell_dictionary
                    WHERE prefix_key = $1
-                         AND '||search_class||'_suggestions IS NOT NULL' 
+                         AND '||search_class||'_suggestions IS NOT NULL'
                 USING entry_key
             LOOP
-                FOREACH sugg IN ARRAY entry.suggestions LOOP
-                    IF NOT seen_list @> ARRAY[sugg] THEN
-                        seen_list := seen_list || sugg;
-                        IF input = sugg THEN -- exact match, no need to spend time on a call
-                            output.lev_distance := 0;
-                            output.suggestion_count = entry.count;
-                        ELSIF ABS(CHARACTER_LENGTH(input) - CHARACTER_LENGTH(sugg)) > maxED THEN
-                            -- They are definitionally too different to consider, just move on.
-                            CONTINUE;
-                        ELSE
-                            --output.lev_distance := levenshtein_less_equal(
-                            output.lev_distance := evergreen.levenshtein_damerau_edistance(
-                                input,
-                                sugg,
-                                maxED
-                            );
-                            IF output.lev_distance < 0 THEN
-                                -- The Perl module returns -1 for "more distant than max".
-                                output.lev_distance := maxED + 1;
-                                -- This short-circuit's the count test below for speed, bypassing
-                                -- a couple useless tests.
-                                output.suggestion_count := -1;
-                            ELSE
-                                EXECUTE 'SELECT '||search_class||'_count FROM search.symspell_dictionary WHERE prefix_key = $1'
-                                    INTO output.suggestion_count USING sugg;
-                            END IF;
-                        END IF;
 
-                        -- The caller passes a minimum suggestion count threshold (or uses
-                        -- the default of 0) and if the suggestion has that many or less uses
-                        -- then we move on to the next suggestion, since this one is too rare.
-                        CONTINUE WHEN output.suggestion_count < COALESCE(count_threshold,1);
+                SELECT  HSTORE(
+                            ARRAY_AGG(
+                                ARRAY[s, evergreen.levenshtein_damerau_edistance(input,s,l_maxED)::TEXT]
+                                    ORDER BY evergreen.levenshtein_damerau_edistance(input,s,l_maxED) DESC
+                            )
+                        )
+                  INTO  good_suggs
+                  FROM  UNNEST(entry.suggestions) s
+                  WHERE (ABS(CHARACTER_LENGTH(s) - i_len) <= maxEd AND evergreen.levenshtein_damerau_edistance(input,s,l_maxED) BETWEEN 0 AND l_maxED)
+                        AND NOT seen_list @> ARRAY[s];
+
+                CONTINUE WHEN good_suggs IS NULL;
+
+                FOR sugg, output.suggestion_count IN EXECUTE
+                    'SELECT  prefix_key, '||search_class||'_count
+                       FROM  search.symspell_dictionary
+                       WHERE prefix_key = ANY ($1)
+                             AND '||search_class||'_count >= $2'
+                    USING AKEYS(good_suggs), COALESCE(count_threshold,1)
+                LOOP
+
+                    output.lev_distance := good_suggs->sugg;
+                    seen_list := seen_list || sugg;
+
+                    -- Track the smallest edit distance among suggestions from this prefix key.
+                    IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
+                        smallest_ed := output.lev_distance;
+                    END IF;
 
-                        -- Track the smallest edit distance among suggestions from this prefix key.
-                        IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
-                            smallest_ed := output.lev_distance;
+                    -- Track the smallest edit distance for all prefix keys for this word.
+                    IF global_ed IS NULL OR smallest_ed < global_ed THEN
+                        global_ed = smallest_ed;
+                        -- And if low verbosity, ignore suggs with a larger distance from here on.
+                        IF verbosity <= 1 THEN
+                            l_maxED := global_ed;
                         END IF;
+                    END IF;
 
-                        -- Track the smallest edit distance for all prefix keys for this word.
-                        IF global_ed IS NULL OR smallest_ed < global_ed THEN
-                            global_ed = smallest_ed;
+                    -- Lev distance is our main similarity measure. While
+                    -- trgm or soundex similarity could be the main filter,
+                    -- Lev is both language agnostic and faster.
+                    --
+                    -- Here we will skip suggestions that have a longer edit distance
+                    -- than the shortest we've already found. This is simply an
+                    -- optimization that allows us to avoid further processing
+                    -- of this entry. It would be filtered out later.
+                    CONTINUE WHEN output.lev_distance > global_ed AND verbosity <= 1;
+
+                    -- If we have an exact match on the suggestion key we can also avoid
+                    -- some function calls.
+                    IF output.lev_distance = 0 THEN
+                        output.qwerty_kb_match := 1;
+                        output.pg_trgm_sim := 1;
+                        output.soundex_sim := 1;
+                    ELSE
+                        IF kbdist_weight THEN
+                            output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
+                        ELSE
+                            output.qwerty_kb_match := 0;
                         END IF;
+                        IF pg_trgm_weight THEN
+                            output.pg_trgm_sim := similarity(input, sugg);
+                        ELSE
+                            output.pg_trgm_sim := 0;
+                        END IF;
+                        IF soundex_weight THEN
+                            output.soundex_sim := difference(input, sugg) / 4.0;
+                        ELSE
+                            output.soundex_sim := 0;
+                        END IF;
+                    END IF;
 
-                        -- Only proceed if the edit distance is <= the max for the dictionary.
-                        IF output.lev_distance <= maxED THEN
-                            IF output.lev_distance > global_ed AND verbosity <= 1 THEN
-                                -- Lev distance is our main similarity measure. While
-                                -- trgm or soundex similarity could be the main filter,
-                                -- Lev is both language agnostic and faster.
-                                --
-                                -- Here we will skip suggestions that have a longer edit distance
-                                -- than the shortest we've already found. This is simply an
-                                -- optimization that allows us to avoid further processing
-                                -- of this entry. It would be filtered out later.
+                    -- Fill in some fields
+                    IF xfer_case AND input <> word THEN
+                        output.suggestion := search.symspell_transfer_casing(word, sugg);
+                    ELSE
+                        output.suggestion := sugg;
+                    END IF;
+                    output.prefix_key := entry.prefix_key;
+                    output.prefix_key_count := entry.count;
+                    output.input := word;
+                    output.norm_input := input;
+                    output.word_pos := w_pos;
 
-                                CONTINUE;
-                            END IF;
+                    -- We can't "cache" a set of generated records directly, so
+                    -- here we build up an array of search.symspell_lookup_output
+                    -- records that we can revivicate later as a table using UNNEST().
+                    output_list := output_list || output;
 
-                            -- If we have an exact match on the suggestion key we can also avoid
-                            -- some function calls.
-                            IF output.lev_distance = 0 THEN
-                                output.qwerty_kb_match := 1;
-                                output.pg_trgm_sim := 1;
-                                output.soundex_sim := 1;
-                            ELSE
-                                output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
-                                output.pg_trgm_sim := similarity(input, sugg);
-                                output.soundex_sim := difference(input, sugg) / 4.0;
-                            END IF;
+                    EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
+                    CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
 
-                            -- Fill in some fields
-                            IF xfer_case THEN
-                                output.suggestion := search.symspell_transfer_casing(word, sugg);
-                            ELSE
-                                output.suggestion := sugg;
-                            END IF;
-                            output.prefix_key := entry.prefix_key;
-                            output.prefix_key_count := entry.count;
-                            output.input := word;
-                            output.norm_input := input;
-                            output.word_pos := w_pos;
-
-                            -- We can't "cache" a set of generated records directly, so
-                            -- here we build up an array of search.symspell_lookup_output
-                            -- records that we can revivicate later as a table using UNNEST().
-                            output_list := output_list || output;
-
-                            EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
-                            CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
-                        END IF; -- maxED test
-                    END IF; -- suggestion not seen test
                 END LOOP; -- loop over suggestions
             END LOOP; -- loop over entries
         END LOOP; -- loop over entry_keys
@@ -1837,7 +1846,7 @@ BEGIN
         END IF;
     END LOOP; -- loop over words
 END;
-$F$ LANGUAGE PLPGSQL;
+$function$;
 
 CREATE OR REPLACE FUNCTION search.symspell_build_raw_entry (
     raw_input       TEXT,
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql
new file mode 100644
index 0000000000..a72c76f709
--- /dev/null
+++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.did_you_mean_optimization.sql
@@ -0,0 +1,268 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION search.symspell_lookup(
+        raw_input text,
+        search_class text,
+        verbosity integer DEFAULT 2,
+        xfer_case boolean DEFAULT false,
+        count_threshold integer DEFAULT 1,
+        soundex_weight integer DEFAULT 0,
+        pg_trgm_weight integer DEFAULT 0,
+        kbdist_weight integer DEFAULT 0
+) RETURNS SETOF search.symspell_lookup_output
+ LANGUAGE plpgsql
+AS $function$
+DECLARE
+    prefix_length INT;
+    maxED         INT;
+    good_suggs  HSTORE;
+    word_list   TEXT[];
+    edit_list   TEXT[] := '{}';
+    seen_list   TEXT[] := '{}';
+    output      search.symspell_lookup_output;
+    output_list search.symspell_lookup_output[];
+    entry       RECORD;
+    entry_key   TEXT;
+    prefix_key  TEXT;
+    sugg        TEXT;
+    input       TEXT;
+    word        TEXT;
+    w_pos       INT := -1;
+    smallest_ed INT := -1;
+    global_ed   INT;
+    i_len       INT;
+    l_maxED     INT;
+BEGIN
+    SELECT value::INT INTO prefix_length FROM config.internal_flag WHERE name = 'symspell.prefix_length' AND enabled;
+    prefix_length := COALESCE(prefix_length, 6);
+
+    SELECT value::INT INTO maxED FROM config.internal_flag WHERE name = 'symspell.max_edit_distance' AND enabled;
+    maxED := COALESCE(maxED, 3);
+
+    word_list := ARRAY_AGG(x) FROM search.symspell_parse_words(raw_input) x;
+
+    -- Common case exact match test for preformance
+    IF verbosity = 0 AND CARDINALITY(word_list) = 1 AND CHARACTER_LENGTH(word_list[1]) <= prefix_length THEN
+        EXECUTE
+          'SELECT  '||search_class||'_suggestions AS suggestions,
+                   '||search_class||'_count AS count,
+                   prefix_key
+             FROM  search.symspell_dictionary
+             WHERE prefix_key = $1
+                   AND '||search_class||'_count >= $2
+                   AND '||search_class||'_suggestions @> ARRAY[$1]'
+          INTO entry USING evergreen.lowercase(word_list[1]), COALESCE(count_threshold,1);
+        IF entry.prefix_key IS NOT NULL THEN
+            output.lev_distance := 0; -- definitionally
+            output.prefix_key := entry.prefix_key;
+            output.prefix_key_count := entry.count;
+            output.suggestion_count := entry.count;
+            output.input := word_list[1];
+            IF xfer_case THEN
+                output.suggestion := search.symspell_transfer_casing(output.input, entry.prefix_key);
+            ELSE
+                output.suggestion := entry.prefix_key;
+            END IF;
+            output.norm_input := entry.prefix_key;
+            output.qwerty_kb_match := 1;
+            output.pg_trgm_sim := 1;
+            output.soundex_sim := 1;
+            RETURN NEXT output;
+            RETURN;
+        END IF;
+    END IF;
+
+    <<word_loop>>
+    FOREACH word IN ARRAY word_list LOOP
+        w_pos := w_pos + 1;
+        input := evergreen.lowercase(word);
+        i_len := CHARACTER_LENGTH(input);
+        l_maxED := maxED;
+
+        IF CHARACTER_LENGTH(input) > prefix_length THEN
+            prefix_key := SUBSTRING(input FROM 1 FOR prefix_length);
+            edit_list := ARRAY[input,prefix_key] || search.symspell_generate_edits(prefix_key, 1, l_maxED);
+        ELSE
+            edit_list := input || search.symspell_generate_edits(input, 1, l_maxED);
+        END IF;
+
+        SELECT ARRAY_AGG(x ORDER BY CHARACTER_LENGTH(x) DESC) INTO edit_list FROM UNNEST(edit_list) x;
+
+        output_list := '{}';
+        seen_list := '{}';
+        global_ed := NULL;
+
+        <<entry_key_loop>>
+        FOREACH entry_key IN ARRAY edit_list LOOP
+            smallest_ed := -1;
+            IF global_ed IS NOT NULL THEN
+                smallest_ed := global_ed;
+            END IF;
+
+            FOR entry IN EXECUTE
+                'SELECT  '||search_class||'_suggestions AS suggestions,
+                         '||search_class||'_count AS count,
+                         prefix_key
+                   FROM  search.symspell_dictionary
+                   WHERE prefix_key = $1
+                         AND '||search_class||'_suggestions IS NOT NULL'
+                USING entry_key
+            LOOP
+
+                SELECT  HSTORE(
+                            ARRAY_AGG(
+                                ARRAY[s, evergreen.levenshtein_damerau_edistance(input,s,l_maxED)::TEXT]
+                                    ORDER BY evergreen.levenshtein_damerau_edistance(input,s,l_maxED) DESC
+                            )
+                        )
+                  INTO  good_suggs
+                  FROM  UNNEST(entry.suggestions) s
+                  WHERE (ABS(CHARACTER_LENGTH(s) - i_len) <= maxEd AND evergreen.levenshtein_damerau_edistance(input,s,l_maxED) BETWEEN 0 AND l_maxED)
+                        AND NOT seen_list @> ARRAY[s];
+
+                CONTINUE WHEN good_suggs IS NULL;
+
+                FOR sugg, output.suggestion_count IN EXECUTE
+                    'SELECT  prefix_key, '||search_class||'_count
+                       FROM  search.symspell_dictionary
+                       WHERE prefix_key = ANY ($1)
+                             AND '||search_class||'_count >= $2'
+                    USING AKEYS(good_suggs), COALESCE(count_threshold,1)
+                LOOP
+
+                    output.lev_distance := good_suggs->sugg;
+                    seen_list := seen_list || sugg;
+
+                    -- Track the smallest edit distance among suggestions from this prefix key.
+                    IF smallest_ed = -1 OR output.lev_distance < smallest_ed THEN
+                        smallest_ed := output.lev_distance;
+                    END IF;
+
+                    -- Track the smallest edit distance for all prefix keys for this word.
+                    IF global_ed IS NULL OR smallest_ed < global_ed THEN
+                        global_ed = smallest_ed;
+                        -- And if low verbosity, ignore suggs with a larger distance from here on.
+                        IF verbosity <= 1 THEN
+                            l_maxED := global_ed;
+                        END IF;
+                    END IF;
+
+                    -- Lev distance is our main similarity measure. While
+                    -- trgm or soundex similarity could be the main filter,
+                    -- Lev is both language agnostic and faster.
+                    --
+                    -- Here we will skip suggestions that have a longer edit distance
+                    -- than the shortest we've already found. This is simply an
+                    -- optimization that allows us to avoid further processing
+                    -- of this entry. It would be filtered out later.
+                    CONTINUE WHEN output.lev_distance > global_ed AND verbosity <= 1;
+
+                    -- If we have an exact match on the suggestion key we can also avoid
+                    -- some function calls.
+                    IF output.lev_distance = 0 THEN
+                        output.qwerty_kb_match := 1;
+                        output.pg_trgm_sim := 1;
+                        output.soundex_sim := 1;
+                    ELSE
+                        IF kbdist_weight THEN
+                            output.qwerty_kb_match := evergreen.qwerty_keyboard_distance_match(input, sugg);
+                        ELSE
+                            output.qwerty_kb_match := 0;
+                        END IF;
+                        IF pg_trgm_weight THEN
+                            output.pg_trgm_sim := similarity(input, sugg);
+                        ELSE
+                            output.pg_trgm_sim := 0;
+                        END IF;
+                        IF soundex_weight THEN
+                            output.soundex_sim := difference(input, sugg) / 4.0;
+                        ELSE
+                            output.soundex_sim := 0;
+                        END IF;
+                    END IF;
+
+                    -- Fill in some fields
+                    IF xfer_case AND input <> word THEN
+                        output.suggestion := search.symspell_transfer_casing(word, sugg);
+                    ELSE
+                        output.suggestion := sugg;
+                    END IF;
+                    output.prefix_key := entry.prefix_key;
+                    output.prefix_key_count := entry.count;
+                    output.input := word;
+                    output.norm_input := input;
+                    output.word_pos := w_pos;
+
+                    -- We can't "cache" a set of generated records directly, so
+                    -- here we build up an array of search.symspell_lookup_output
+                    -- records that we can revivicate later as a table using UNNEST().
+                    output_list := output_list || output;
+
+                    EXIT entry_key_loop WHEN smallest_ed = 0 AND verbosity = 0; -- exact match early exit
+                    CONTINUE entry_key_loop WHEN smallest_ed = 0 AND verbosity = 1; -- exact match early jump to the next key
+
+                END LOOP; -- loop over suggestions
+            END LOOP; -- loop over entries
+        END LOOP; -- loop over entry_keys
+
+        -- Now we're done examining this word
+        IF verbosity = 0 THEN
+            -- Return the "best" suggestion from the smallest edit
+            -- distance group.  We define best based on the weighting
+            -- of the non-lev similarity measures and use the suggestion
+            -- use count to break ties.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC
+                        LIMIT 1;
+        ELSIF verbosity = 1 THEN
+            -- Return all suggestions from the smallest
+            -- edit distance group.
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list) WHERE lev_distance = smallest_ed
+                    ORDER BY (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 2 THEN
+            -- Return everything we find, along with relevant stats
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 3 THEN
+            -- Return everything we find from the two smallest edit distance groups
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        ELSIF verbosity = 4 THEN
+            -- Return everything we find from the two smallest edit distance groups that are NOT 0 distance
+            RETURN QUERY
+                SELECT * FROM UNNEST(output_list)
+                    WHERE lev_distance IN (SELECT DISTINCT lev_distance FROM UNNEST(output_list) WHERE lev_distance > 0 ORDER BY 1 LIMIT 2)
+                    ORDER BY lev_distance,
+                        (soundex_sim * COALESCE(soundex_weight,0))
+                            + (pg_trgm_sim * COALESCE(pg_trgm_weight,0))
+                            + (qwerty_kb_match * COALESCE(kbdist_weight,0)) DESC,
+                        suggestion_count DESC;
+        END IF;
+    END LOOP; -- loop over words
+END;
+$function$;
+
+COMMIT;
+
-- 
2.43.2