From 4196a3a4c181debd68be410dc18897bfdc4841e2 Mon Sep 17 00:00:00 2001 From: Bill Erickson Date: Thu, 11 May 2017 11:56:32 -0400 Subject: [PATCH] LP#1251394 config.display_field_map / flat json display Signed-off-by: Bill Erickson Signed-off-by: Mike Rylander --- Open-ILS/examples/fm_IDL.xml | 48 +-- .../XXXX.schema.metabib-display-field.sql | 106 ++---- .../YYYY.data.metabib-display-field.sql | 46 +++ .../ZZZZ.UNDO.metabib-display-field.sql | 306 ++++++++++++++++++ 4 files changed, 401 insertions(+), 105 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql create mode 100644 Open-ILS/src/sql/Pg/upgrade/ZZZZ.UNDO.metabib-display-field.sql diff --git a/Open-ILS/examples/fm_IDL.xml b/Open-ILS/examples/fm_IDL.xml index f4c7d6d4cd..dc283f0aa9 100644 --- a/Open-ILS/examples/fm_IDL.xml +++ b/Open-ILS/examples/fm_IDL.xml @@ -3741,41 +3741,47 @@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA - - SELECT - mde.source, - mde.value, - cmf.id AS field, - cmf.field_class, - cmf.name, - cmf.label, - cmc.name AS representative - FROM metabib.display_entry mde - JOIN config.metabib_field cmf ON (cmf.id = mde.field) LEFT JOIN config.metabib_class cmc ON - (cmc.name = cmf.field_class AND cmc.representative_field = cmf.id) - + reporter:label="Flat Display Field Entry View" + oils_persist:readonly="true"> - - - - + + + + + + + + + + + + + + + - - - + + + + diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql index 1fe26061f2..6066bbad71 100644 --- a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.metabib-display-field.sql @@ -3,34 +3,34 @@ BEGIN; ALTER TABLE config.metabib_field ADD COLUMN display_xpath TEXT, - ADD COLUMN display_field BOOL NOT NULL DEFAULT TRUE; - -CREATE OR REPLACE FUNCTION - config.metabib_representative_field_is_valid(INTEGER, TEXT) RETURNS BOOLEAN AS $$ - SELECT EXISTS (SELECT 1 FROM config.metabib_field WHERE id = $1 AND field_class = $2); -$$ LANGUAGE SQL STRICT IMMUTABLE; - -COMMENT ON FUNCTION config.metabib_representative_field_is_valid(INTEGER, TEXT) IS $$ -Ensure the field_class value on the selected representative field matches -the class name. -$$; - -ALTER TABLE config.metabib_class - ADD COLUMN representative_field - INTEGER REFERENCES config.metabib_field(id), - ADD CONSTRAINT rep_field_unique UNIQUE(representative_field), - ADD CONSTRAINT rep_field_is_valid CHECK ( - representative_field IS NULL OR - config.metabib_representative_field_is_valid(representative_field, name) - ); + ADD COLUMN display_field BOOL NOT NULL DEFAULT FALSE; CREATE TABLE metabib.display_entry ( id BIGSERIAL PRIMARY KEY, - source BIGINT NOT NULL, - field INT NOT NULL, + source BIGINT NOT NULL REFERENCES biblio.record_entry (id), + field INT NOT NULL REFERENCES config.metabib_field (id), value TEXT NOT NULL ); +CREATE TABLE config.display_field_map ( + name TEXT PRIMARY KEY, + field INTEGER REFERENCES config.metabib_field (id), + multi BOOLEAN DEFAULT FALSE +); + +CREATE VIEW metabib.flat_display_entry AS + SELECT + mde.source, + cdfm.name, + CASE WHEN cdfm.multi THEN + TO_JSON(ARRAY_AGG(value)) + ELSE + TO_JSON(MIN(value)) + END AS value + FROM metabib.display_entry mde + JOIN config.display_field_map cdfm ON (cdfm.field = mde.field) + GROUP BY 1, 2; + CREATE INDEX metabib_display_entry_field_idx ON metabib.display_entry (field); CREATE INDEX metabib_display_entry_source_idx @@ -399,69 +399,7 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; - --- DATA -------------------------------------- - -UPDATE config.metabib_field SET display_field = FALSE - WHERE field_class = 'keyword' OR name = 'complete'; - -INSERT INTO config.internal_flag (name, enabled) - VALUES ('ingest.skip_display_indexing', FALSE); - --- personal author -UPDATE config.metabib_class SET representative_field = 8 WHERE name = 'author'; --- title proper -UPDATE config.metabib_class SET representative_field = 6 WHERE name = 'title'; - COMMIT; -/* --- Ham-fisted reingest for Testing --------------------- - --- disable everything we can for reindexing -UPDATE config.internal_flag SET enabled = TRUE WHERE name IN ( - 'ingest.assume_inserts_only', - 'ingest.disable_authority_auto_update', - 'ingest.disable_authority_linking', - 'ingest.disable_located_uri', - 'ingest.disable_metabib_field_entry', - 'ingest.disable_metabib_full_rec', - 'ingest.disable_metabib_rec_descriptor', - 'ingest.metarecord_mapping.preserve_on_delete', - 'ingest.metarecord_mapping.skip_on_insert', - 'ingest.metarecord_mapping.skip_on_update', - 'ingest.reingest.force_on_same_marc', - 'ingest.skip_browse_indexing', - 'ingest.skip_facet_indexing', - 'ingest.skip_search_indexing' -); - -UPDATE config.internal_flag SET enabled = TRUE - WHERE name = 'ingest.reingest.force_on_same_marc'; - -UPDATE biblio.record_entry SET marc = marc; - -UPDATE config.internal_flag SET enabled = FALSE - WHERE name = 'ingest.reingest.force_on_same_marc'; - --- re-enable the default ingest flags -UPDATE config.internal_flag SET enabled = FALSE WHERE name IN ( - 'ingest.assume_inserts_only', - 'ingest.disable_authority_auto_update', - 'ingest.disable_authority_linking', - 'ingest.disable_located_uri', - 'ingest.disable_metabib_field_entry', - 'ingest.disable_metabib_full_rec', - 'ingest.disable_metabib_rec_descriptor', - 'ingest.metarecord_mapping.preserve_on_delete', - 'ingest.metarecord_mapping.skip_on_insert', - 'ingest.metarecord_mapping.skip_on_update', - 'ingest.reingest.force_on_same_marc', - 'ingest.skip_browse_indexing', - 'ingest.skip_facet_indexing', - 'ingest.skip_search_indexing' -); - -*/ diff --git a/Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql b/Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql new file mode 100644 index 0000000000..7432f7ce5a --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/YYYY.data.metabib-display-field.sql @@ -0,0 +1,46 @@ + +BEGIN; + +INSERT INTO config.internal_flag (name, enabled) + VALUES ('ingest.skip_display_indexing', FALSE); + +-- Adds seed data to replace (for now) values from the 'mvr' class + +INSERT INTO config.metabib_field (id, field_class, name, format, + display_field, search_field, browse_field, label, xpath) +VALUES + (37, 'title', 'display|title', 'mods32', TRUE, FALSE, FALSE, + oils_i18n_gettext(37, 'Title', 'cmf', 'label'), + '//mods32:mods/mods32:titleNonfiling[mods32:title and not (@type)]'), + (38, 'author', 'display|author', 'mods32', TRUE, FALSE, FALSE, + oils_i18n_gettext(38, 'Author', 'cmf', 'label'), + $$//mods32:mods/mods32:name[@type='personal' and mods32:role/mods32:roleTerm[text()='creator']]$$), + (39, 'subject', 'display|subject', 'mods32', TRUE, FALSE, FALSE, + oils_i18n_gettext(39, 'Subject', 'cmf', 'label'), + '//mods32:mods/mods32:subject'), + (40, 'subject', 'display|topic_subject', 'mods32', TRUE, FALSE, FALSE, + oils_i18n_gettext(40, 'Subject', 'cmf', 'label'), + '//mods32:mods/mods32:subject/mods32:topic') +; + +INSERT INTO config.display_field_map (name, field, multi) VALUES + ('title', 37, FALSE), + ('author', 38, FALSE), + ('subject', 39, TRUE), + ('topic_subject', 40, TRUE) +; + +COMMIT; + +-- REINGEST DISPLAY ENTRIES + +BEGIN; +UPDATE config.internal_flag SET enabled = TRUE WHERE name IN ( +'ingest.assume_inserts_only','ingest.disable_authority_auto_update','ingest.disable_authority_linking','ingest.disable_located_uri','ingest.disable_metabib_field_entry','ingest.disable_metabib_full_rec','ingest.disable_metabib_rec_descriptor','ingest.metarecord_mapping.preserve_on_delete','ingest.metarecord_mapping.skip_on_insert','ingest.metarecord_mapping.skip_on_update','ingest.reingest.force_on_same_marc','ingest.skip_browse_indexing','ingest.skip_facet_indexing','ingest.skip_search_indexing'); + +UPDATE biblio.record_entry SET marc = marc; + +UPDATE config.internal_flag SET enabled = FALSE WHERE name IN ( +'ingest.assume_inserts_only','ingest.disable_authority_auto_update','ingest.disable_authority_linking','ingest.disable_located_uri','ingest.disable_metabib_field_entry','ingest.disable_metabib_full_rec','ingest.disable_metabib_rec_descriptor','ingest.metarecord_mapping.preserve_on_delete','ingest.metarecord_mapping.skip_on_insert','ingest.metarecord_mapping.skip_on_update','ingest.reingest.force_on_same_marc','ingest.skip_browse_indexing','ingest.skip_facet_indexing','ingest.skip_search_indexing'); +COMMIT; + diff --git a/Open-ILS/src/sql/Pg/upgrade/ZZZZ.UNDO.metabib-display-field.sql b/Open-ILS/src/sql/Pg/upgrade/ZZZZ.UNDO.metabib-display-field.sql new file mode 100644 index 0000000000..60c4cd1ae7 --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/ZZZZ.UNDO.metabib-display-field.sql @@ -0,0 +1,306 @@ +-- XXX REVERT FILE -- DELETE THIS BEFORE MERGING XXX -- + +DELETE FROM metabib.display_entry; +DELETE FROM config.display_field_map; +DELETE FROM config.metabib_field WHERE display_field; -- ASSUMES ALL NEW FIELDS + +DELETE FROM config.internal_flag WHERE name = 'ingest.skip_display_indexing'; + +BEGIN; + +DROP FUNCTION metabib.reingest_metabib_field_entries(BIGINT, BOOL, BOOL, BOOL, BOOL); + +CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ +DECLARE + fclass RECORD; + ind_data metabib.field_entry_template%ROWTYPE; + mbe_row metabib.browse_entry%ROWTYPE; + mbe_id BIGINT; + b_skip_facet BOOL; + b_skip_browse BOOL; + b_skip_search BOOL; + value_prepped TEXT; +BEGIN + + SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; + SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; + SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; + + PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; + IF NOT FOUND THEN + IF NOT b_skip_search THEN + FOR fclass IN SELECT * FROM config.metabib_class LOOP + -- RAISE NOTICE 'Emptying out %', fclass.name; + EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; + END LOOP; + END IF; + IF NOT b_skip_facet THEN + DELETE FROM metabib.facet_entry WHERE source = bib_id; + END IF; + IF NOT b_skip_browse THEN + DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; + END IF; + END IF; + + FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP + + -- don't store what has been normalized away + CONTINUE WHEN ind_data.value IS NULL; + + IF ind_data.field < 0 THEN + ind_data.field = -1 * ind_data.field; + END IF; + + IF ind_data.facet_field AND NOT b_skip_facet THEN + INSERT INTO metabib.facet_entry (field, source, value) + VALUES (ind_data.field, ind_data.source, ind_data.value); + END IF; + + IF ind_data.browse_field AND NOT b_skip_browse THEN + -- A caveat about this SELECT: this should take care of replacing + -- old mbe rows when data changes, but not if normalization (by + -- which I mean specifically the output of + -- evergreen.oils_tsearch2()) changes. It may or may not be + -- expensive to add a comparison of index_vector to index_vector + -- to the WHERE clause below. + + CONTINUE WHEN ind_data.sort_value IS NULL; + + value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); + SELECT INTO mbe_row * FROM metabib.browse_entry + WHERE value = value_prepped AND sort_value = ind_data.sort_value; + + IF FOUND THEN + mbe_id := mbe_row.id; + ELSE + INSERT INTO metabib.browse_entry + ( value, sort_value ) VALUES + ( value_prepped, ind_data.sort_value ); + + mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); + END IF; + + INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) + VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); + END IF; + + IF ind_data.search_field AND NOT b_skip_search THEN + -- Avoid inserting duplicate rows + EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || + '_field_entry WHERE field = $1 AND source = $2 AND value = $3' + INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; + -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; + IF mbe_id IS NULL THEN + EXECUTE $$ + INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) + VALUES ($$ || + quote_literal(ind_data.field) || $$, $$ || + quote_literal(ind_data.source) || $$, $$ || + quote_literal(ind_data.value) || + $$);$$; + END IF; + END IF; + + END LOOP; + + IF NOT b_skip_search THEN + PERFORM metabib.update_combined_index_vectors(bib_id); + END IF; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + + +CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ +DECLARE + bib biblio.record_entry%ROWTYPE; + idx config.metabib_field%ROWTYPE; + xfrm config.xml_transform%ROWTYPE; + prev_xfrm TEXT; + transformed_xml TEXT; + xml_node TEXT; + xml_node_list TEXT[]; + facet_text TEXT; + browse_text TEXT; + sort_value TEXT; + raw_text TEXT; + curr_text TEXT; + joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? + authority_text TEXT; + authority_link BIGINT; + output_row metabib.field_entry_template%ROWTYPE; +BEGIN + + -- Start out with no field-use bools set + output_row.browse_field = FALSE; + output_row.facet_field = FALSE; + output_row.search_field = FALSE; + + -- Get the record + SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; + + -- Loop over the indexing entries + FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP + + joiner := COALESCE(idx.joiner, default_joiner); + + SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; + + -- See if we can skip the XSLT ... it's expensive + IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN + -- Can't skip the transform + IF xfrm.xslt <> '---' THEN + transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); + ELSE + transformed_xml := bib.marc; + END IF; + + prev_xfrm := xfrm.name; + END IF; + + xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + + raw_text := NULL; + FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP + CONTINUE WHEN xml_node !~ E'^\\s*<'; + + -- XXX much of this should be moved into oils_xpath_string... + curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value( + oils_xpath( '//text()', -- get the content of all the nodes within the main selected node + REGEXP_REPLACE( xml_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space + ), ' '), ''), -- throw away morally empty (bankrupt?) strings + joiner + ); + + CONTINUE WHEN curr_text IS NULL OR curr_text = ''; + + IF raw_text IS NOT NULL THEN + raw_text := raw_text || joiner; + END IF; + + raw_text := COALESCE(raw_text,'') || curr_text; + + -- autosuggest/metabib.browse_entry + IF idx.browse_field THEN + + IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN + browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + browse_text := curr_text; + END IF; + + IF idx.browse_sort_xpath IS NOT NULL AND + idx.browse_sort_xpath <> '' THEN + + sort_value := oils_xpath_string( + idx.browse_sort_xpath, xml_node, joiner, + ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] + ); + ELSE + sort_value := browse_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); + output_row.sort_value := + public.naco_normalize(sort_value); + + output_row.authority := NULL; + + IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN + authority_text := oils_xpath_string( + idx.authority_xpath, xml_node, joiner, + ARRAY[ + ARRAY[xfrm.prefix, xfrm.namespace_uri], + ARRAY['xlink','http://www.w3.org/1999/xlink'] + ] + ); + + IF authority_text ~ '^\d+$' THEN + authority_link := authority_text::BIGINT; + PERFORM * FROM authority.record_entry WHERE id = authority_link; + IF FOUND THEN + output_row.authority := authority_link; + END IF; + END IF; + + END IF; + + output_row.browse_field = TRUE; + -- Returning browse rows with search_field = true for search+browse + -- configs allows us to retain granularity of being able to search + -- browse fields with "starts with" type operators (for example, for + -- titles of songs in music albums) + IF idx.search_field THEN + output_row.search_field = TRUE; + END IF; + RETURN NEXT output_row; + output_row.browse_field = FALSE; + output_row.search_field = FALSE; + output_row.sort_value := NULL; + END IF; + + -- insert raw node text for faceting + IF idx.facet_field THEN + + IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN + facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); + ELSE + facet_text := curr_text; + END IF; + + output_row.field_class = idx.field_class; + output_row.field = -1 * idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); + + output_row.facet_field = TRUE; + RETURN NEXT output_row; + output_row.facet_field = FALSE; + END IF; + + END LOOP; + + CONTINUE WHEN raw_text IS NULL OR raw_text = ''; + + -- insert combined node text for searching + IF idx.search_field THEN + output_row.field_class = idx.field_class; + output_row.field = idx.id; + output_row.source = rid; + output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); + + output_row.search_field = TRUE; + RETURN NEXT output_row; + output_row.search_field = FALSE; + END IF; + + END LOOP; + +END; + +$func$ LANGUAGE PLPGSQL; + + +ALTER TYPE metabib.field_entry_template DROP ATTRIBUTE display_field; +DROP TRIGGER display_field_force_nfc_tgr ON metabib.display_entry; +DROP FUNCTION evergreen.display_field_force_nfc(); +DROP TRIGGER display_field_normalize_tgr ON metabib.display_entry; +DROP FUNCTION metabib.display_field_normalize_trigger(); +DROP INDEX metabib.metabib_display_entry_source_idx; +DROP INDEX metabib.metabib_display_entry_field_idx; +DROP VIEW metabib.flat_display_entry; +DROP TABLE config.display_field_map; +DROP TABLE metabib.display_entry; + +ALTER TABLE config.metabib_field + DROP COLUMN display_xpath, + DROP COLUMN display_field; + +COMMIT; + + + -- 2.43.2