From e2870476e7363d0773df7f36205506084c9bf013 Mon Sep 17 00:00:00 2001 From: Mike Rylander Date: Fri, 20 Jun 2014 09:29:56 -0400 Subject: [PATCH] LP#1322285: Exclude default values for fixed fields when ingesting Default values for fixed fields that can exist in either 008 or 006 will cause search to be over-broad. So, we avoid including those now. I also took the time to move the baseline schema around a bit in order to keep related code in more appropriate files and avoid continuously redefining functions. Signed-off-by: Mike Rylander Signed-off-by: Jason Stephenson --- Open-ILS/src/sql/Pg/012.schema.vandelay.sql | 109 +++++++++---- Open-ILS/src/sql/Pg/030.schema.metabib.sql | 143 +----------------- .../upgrade/XXXX.function.no-ff-defaults.sql | 118 +++++++++++++++ 3 files changed, 202 insertions(+), 168 deletions(-) create mode 100644 Open-ILS/src/sql/Pg/upgrade/XXXX.function.no-ff-defaults.sql diff --git a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql index 9bfa4913f2..2ca0fcc074 100644 --- a/Open-ILS/src/sql/Pg/012.schema.vandelay.sql +++ b/Open-ILS/src/sql/Pg/012.schema.vandelay.sql @@ -209,11 +209,11 @@ CREATE TABLE vandelay.merge_profile ( CREATE OR REPLACE FUNCTION vandelay.marc21_record_type( marc TEXT ) RETURNS config.marc21_rec_type_map AS $func$ DECLARE - ldr TEXT; - tval TEXT; - tval_rec RECORD; - bval TEXT; - bval_rec RECORD; + ldr TEXT; + tval TEXT; + tval_rec RECORD; + bval TEXT; + bval_rec RECORD; retval config.marc21_rec_type_map%ROWTYPE; BEGIN ldr := oils_xpath_string( '//*[local-name()="leader"]', marc ); @@ -243,51 +243,102 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; -CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field( marc TEXT, ff TEXT ) RETURNS TEXT AS $func$ +CREATE TYPE biblio.record_ff_map AS (record BIGINT, ff_name TEXT, ff_value TEXT); +CREATE OR REPLACE FUNCTION vandelay.marc21_extract_all_fixed_fields( marc TEXT, use_default BOOL DEFAULT FALSE ) RETURNS SETOF biblio.record_ff_map AS $func$ +DECLARE + tag_data TEXT; + rtype TEXT; + ff_pos RECORD; + output biblio.record_ff_map%ROWTYPE; +BEGIN + rtype := (vandelay.marc21_record_type( marc )).code; + + FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE rec_type = rtype ORDER BY tag DESC LOOP + output.ff_name := ff_pos.fixed_field; + output.ff_value := NULL; + + IF ff_pos.tag = 'ldr' THEN + output.ff_value := oils_xpath_string('//*[local-name()="leader"]', marc); + IF output.ff_value IS NOT NULL THEN + output.ff_value := SUBSTRING( output.ff_value, ff_pos.start_pos + 1, ff_pos.length ); + RETURN NEXT output; + output.ff_value := NULL; + END IF; + ELSE + FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP + output.ff_value := SUBSTRING( tag_data, ff_pos.start_pos + 1, ff_pos.length ); + CONTINUE WHEN output.ff_value IS NULL AND NOT use_default; + IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF; + RETURN NEXT output; + output.ff_value := NULL; + END LOOP; + END IF; + + END LOOP; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field_list( marc TEXT, ff TEXT, use_default BOOL DEFAULT FALSE ) RETURNS TEXT[] AS $func$ DECLARE rtype TEXT; ff_pos RECORD; tag_data RECORD; val TEXT; + collection TEXT[] := '{}'::TEXT[]; BEGIN rtype := (vandelay.marc21_record_type( marc )).code; FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP - FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP - val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); - RETURN val; - END LOOP; + IF ff_pos.tag = 'ldr' THEN + val := oils_xpath_string('//*[local-name()="leader"]', marc); + IF val IS NOT NULL THEN + val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length ); + collection := collection || val; + END IF; + ELSE + FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP + val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); + collection := collection || val; + END LOOP; + END IF; + CONTINUE WHEN NOT use_default; + CONTINUE WHEN ARRAY_UPPER(collection, 1) > 0; val := REPEAT( ff_pos.default_val, ff_pos.length ); - RETURN val; + collection := collection || val; END LOOP; - RETURN NULL; + RETURN collection; END; $func$ LANGUAGE PLPGSQL; -CREATE TYPE biblio.record_ff_map AS (record BIGINT, ff_name TEXT, ff_value TEXT); -CREATE OR REPLACE FUNCTION vandelay.marc21_extract_all_fixed_fields( marc TEXT ) RETURNS SETOF biblio.record_ff_map AS $func$ +CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field( marc TEXT, ff TEXT, use_default BOOL DEFAULT FALSE ) RETURNS TEXT AS $func$ DECLARE - tag_data TEXT; rtype TEXT; ff_pos RECORD; - output biblio.record_ff_map%ROWTYPE; + tag_data RECORD; + val TEXT; BEGIN rtype := (vandelay.marc21_record_type( marc )).code; - - FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE rec_type = rtype ORDER BY tag DESC LOOP - output.ff_name := ff_pos.fixed_field; - output.ff_value := NULL; - - FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(tag) || '"]/text()', marc ) ) x(value) LOOP - output.ff_value := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); - IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF; - RETURN NEXT output; - output.ff_value := NULL; - END LOOP; - + FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP + IF ff_pos.tag = 'ldr' THEN + val := oils_xpath_string('//*[local-name()="leader"]', marc); + IF val IS NOT NULL THEN + val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length ); + RETURN val; + END IF; + ELSE + FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP + val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); + RETURN val; + END LOOP; + END IF; + CONTINUE WHEN NOT use_default; + val := REPEAT( ff_pos.default_val, ff_pos.length ); + RETURN val; END LOOP; - RETURN; + RETURN NULL; END; $func$ LANGUAGE PLPGSQL; diff --git a/Open-ILS/src/sql/Pg/030.schema.metabib.sql b/Open-ILS/src/sql/Pg/030.schema.metabib.sql index 12421022c6..6bde362f17 100644 --- a/Open-ILS/src/sql/Pg/030.schema.metabib.sql +++ b/Open-ILS/src/sql/Pg/030.schema.metabib.sql @@ -924,155 +924,20 @@ BEGIN END; $func$ LANGUAGE PLPGSQL; -CREATE OR REPLACE FUNCTION vandelay.marc21_record_type( marc TEXT ) RETURNS config.marc21_rec_type_map AS $func$ -DECLARE - ldr TEXT; - tval TEXT; - tval_rec RECORD; - bval TEXT; - bval_rec RECORD; - retval config.marc21_rec_type_map%ROWTYPE; -BEGIN - ldr := oils_xpath_string( '//*[local-name()="leader"]', marc ); - - IF ldr IS NULL OR ldr = '' THEN - SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS'; - RETURN retval; - END IF; - - SELECT * INTO tval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'Type' LIMIT 1; -- They're all the same - SELECT * INTO bval_rec FROM config.marc21_ff_pos_map WHERE fixed_field = 'BLvl' LIMIT 1; -- They're all the same - - - tval := SUBSTRING( ldr, tval_rec.start_pos + 1, tval_rec.length ); - bval := SUBSTRING( ldr, bval_rec.start_pos + 1, bval_rec.length ); - - -- RAISE NOTICE 'type %, blvl %, ldr %', tval, bval, ldr; - - SELECT * INTO retval FROM config.marc21_rec_type_map WHERE type_val LIKE '%' || tval || '%' AND blvl_val LIKE '%' || bval || '%'; - - - IF retval.code IS NULL THEN - SELECT * INTO retval FROM config.marc21_rec_type_map WHERE code = 'BKS'; - END IF; - - RETURN retval; -END; -$func$ LANGUAGE PLPGSQL; - -CREATE OR REPLACE FUNCTION biblio.marc21_record_type( rid BIGINT ) RETURNS config.marc21_rec_type_map AS $func$ - SELECT * FROM vandelay.marc21_record_type( (SELECT marc FROM biblio.record_entry WHERE id = $1) ); -$func$ LANGUAGE SQL; - -CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field_list( marc TEXT, ff TEXT ) RETURNS TEXT[] AS $func$ -DECLARE - rtype TEXT; - ff_pos RECORD; - tag_data RECORD; - val TEXT; - collection TEXT[] := '{}'::TEXT[]; -BEGIN - rtype := (vandelay.marc21_record_type( marc )).code; - FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP - IF ff_pos.tag = 'ldr' THEN - val := oils_xpath_string('//*[local-name()="leader"]', marc); - IF val IS NOT NULL THEN - val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length ); - collection := collection || val; - END IF; - ELSE - FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP - val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); - collection := collection || val; - END LOOP; - END IF; - val := REPEAT( ff_pos.default_val, ff_pos.length ); - collection := collection || val; - END LOOP; - - RETURN collection; -END; -$func$ LANGUAGE PLPGSQL; - -CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field( marc TEXT, ff TEXT ) RETURNS TEXT AS $func$ -DECLARE - rtype TEXT; - ff_pos RECORD; - tag_data RECORD; - val TEXT; -BEGIN - rtype := (vandelay.marc21_record_type( marc )).code; - FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP - IF ff_pos.tag = 'ldr' THEN - val := oils_xpath_string('//*[local-name()="leader"]', marc); - IF val IS NOT NULL THEN - val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length ); - RETURN val; - END IF; - ELSE - FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP - val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); - RETURN val; - END LOOP; - END IF; - val := REPEAT( ff_pos.default_val, ff_pos.length ); - RETURN val; - END LOOP; - - RETURN NULL; -END; -$func$ LANGUAGE PLPGSQL; - CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$ - SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2 ); + SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE ); $func$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$ - SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2 ); + SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE ); $func$ LANGUAGE SQL; --- CREATE TYPE biblio.record_ff_map AS (record BIGINT, ff_name TEXT, ff_value TEXT); -CREATE OR REPLACE FUNCTION vandelay.marc21_extract_all_fixed_fields( marc TEXT ) RETURNS SETOF biblio.record_ff_map AS $func$ -DECLARE - tag_data TEXT; - rtype TEXT; - ff_pos RECORD; - output biblio.record_ff_map%ROWTYPE; -BEGIN - rtype := (vandelay.marc21_record_type( marc )).code; - - FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE rec_type = rtype ORDER BY tag DESC LOOP - output.ff_name := ff_pos.fixed_field; - output.ff_value := NULL; - - IF ff_pos.tag = 'ldr' THEN - output.ff_value := oils_xpath_string('//*[local-name()="leader"]', marc); - IF output.ff_value IS NOT NULL THEN - output.ff_value := SUBSTRING( output.ff_value, ff_pos.start_pos + 1, ff_pos.length ); - RETURN NEXT output; - output.ff_value := NULL; - END IF; - ELSE - FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP - output.ff_value := SUBSTRING( tag_data, ff_pos.start_pos + 1, ff_pos.length ); - IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF; - RETURN NEXT output; - output.ff_value := NULL; - END LOOP; - END IF; - - END LOOP; - - RETURN; -END; -$func$ LANGUAGE PLPGSQL; - CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$ - SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1) ); + SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE ); $func$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION biblio.marc21_physical_characteristics( rid BIGINT ) RETURNS SETOF biblio.marc21_physical_characteristics AS $func$ - SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1) ); + SELECT id, $1 AS record, ptype, subfield, value FROM vandelay.marc21_physical_characteristics( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE ); $func$ LANGUAGE SQL; CREATE OR REPLACE FUNCTION biblio.extract_quality ( marc TEXT, best_lang TEXT, best_type TEXT ) RETURNS INT AS $func$ diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.function.no-ff-defaults.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.no-ff-defaults.sql new file mode 100644 index 0000000000..3fb03c4b3c --- /dev/null +++ b/Open-ILS/src/sql/Pg/upgrade/XXXX.function.no-ff-defaults.sql @@ -0,0 +1,118 @@ +BEGIN; + +CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field_list( marc TEXT, ff TEXT, use_default BOOL DEFAULT FALSE ) RETURNS TEXT[] AS $func$ +DECLARE + rtype TEXT; + ff_pos RECORD; + tag_data RECORD; + val TEXT; + collection TEXT[] := '{}'::TEXT[]; +BEGIN + rtype := (vandelay.marc21_record_type( marc )).code; + FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP + IF ff_pos.tag = 'ldr' THEN + val := oils_xpath_string('//*[local-name()="leader"]', marc); + IF val IS NOT NULL THEN + val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length ); + collection := collection || val; + END IF; + ELSE + FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP + val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); + collection := collection || val; + END LOOP; + END IF; + CONTINUE WHEN NOT use_default; + CONTINUE WHEN ARRAY_UPPER(collection, 1) > 0; + val := REPEAT( ff_pos.default_val, ff_pos.length ); + collection := collection || val; + END LOOP; + + RETURN collection; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.marc21_extract_fixed_field( marc TEXT, ff TEXT, use_default BOOL DEFAULT FALSE ) RETURNS TEXT AS $func$ +DECLARE + rtype TEXT; + ff_pos RECORD; + tag_data RECORD; + val TEXT; +BEGIN + rtype := (vandelay.marc21_record_type( marc )).code; + FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE fixed_field = ff AND rec_type = rtype ORDER BY tag DESC LOOP + IF ff_pos.tag = 'ldr' THEN + val := oils_xpath_string('//*[local-name()="leader"]', marc); + IF val IS NOT NULL THEN + val := SUBSTRING( val, ff_pos.start_pos + 1, ff_pos.length ); + RETURN val; + END IF; + ELSE + FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP + val := SUBSTRING( tag_data.value, ff_pos.start_pos + 1, ff_pos.length ); + RETURN val; + END LOOP; + END IF; + CONTINUE WHEN NOT use_default; + val := REPEAT( ff_pos.default_val, ff_pos.length ); + RETURN val; + END LOOP; + + RETURN NULL; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION vandelay.marc21_extract_all_fixed_fields( marc TEXT, use_default BOOL DEFAULT FALSE ) RETURNS SETOF biblio.record_ff_map AS $func$ +DECLARE + tag_data TEXT; + rtype TEXT; + ff_pos RECORD; + output biblio.record_ff_map%ROWTYPE; +BEGIN + rtype := (vandelay.marc21_record_type( marc )).code; + + FOR ff_pos IN SELECT * FROM config.marc21_ff_pos_map WHERE rec_type = rtype ORDER BY tag DESC LOOP + output.ff_name := ff_pos.fixed_field; + output.ff_value := NULL; + + IF ff_pos.tag = 'ldr' THEN + output.ff_value := oils_xpath_string('//*[local-name()="leader"]', marc); + IF output.ff_value IS NOT NULL THEN + output.ff_value := SUBSTRING( output.ff_value, ff_pos.start_pos + 1, ff_pos.length ); + RETURN NEXT output; + output.ff_value := NULL; + END IF; + ELSE + FOR tag_data IN SELECT value FROM UNNEST( oils_xpath( '//*[@tag="' || UPPER(ff_pos.tag) || '"]/text()', marc ) ) x(value) LOOP + output.ff_value := SUBSTRING( tag_data, ff_pos.start_pos + 1, ff_pos.length ); + CONTINUE WHEN output.ff_value IS NULL AND NOT use_default; + IF output.ff_value IS NULL THEN output.ff_value := REPEAT( ff_pos.default_val, ff_pos.length ); END IF; + RETURN NEXT output; + output.ff_value := NULL; + END LOOP; + END IF; + + END LOOP; + + RETURN; +END; +$func$ LANGUAGE PLPGSQL; + +CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field_list( rid BIGINT, ff TEXT ) RETURNS TEXT[] AS $func$ + SELECT * FROM vandelay.marc21_extract_fixed_field_list( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE ); +$func$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION biblio.marc21_extract_fixed_field( rid BIGINT, ff TEXT ) RETURNS TEXT AS $func$ + SELECT * FROM vandelay.marc21_extract_fixed_field( (SELECT marc FROM biblio.record_entry WHERE id = $1), $2, TRUE ); +$func$ LANGUAGE SQL; + +CREATE OR REPLACE FUNCTION biblio.marc21_extract_all_fixed_fields( rid BIGINT ) RETURNS SETOF biblio.record_ff_map AS $func$ + SELECT $1 AS record, ff_name, ff_value FROM vandelay.marc21_extract_all_fixed_fields( (SELECT marc FROM biblio.record_entry WHERE id = $1), TRUE ); +$func$ LANGUAGE SQL; + +DROP FUNCTION IF EXISTS vandelay.marc21_extract_fixed_field_list( text, text ); +DROP FUNCTION IF EXISTS vandelay.marc21_extract_fixed_field( text, text ); +DROP FUNCTION IF EXISTS vandelay.marc21_extract_all_fixed_fields( text ); + +COMMIT; + -- 2.43.2