LP#1117808: new Pg function to calculate MARC record merges
authorGalen Charlton <gmc@equinoxinitiative.org>
Wed, 30 Nov 2016 14:49:10 +0000 (09:49 -0500)
committerKathy Lussier <klussier@masslnc.org>
Fri, 17 Feb 2017 15:13:36 +0000 (10:13 -0500)
This patch adds a new stored function, vandelay.merge_record_xml_using_profile(),
that when passed two MARCXML strings and the ID of a merge profile.
returns the MARCXML string representing the results of the record merge.

This is meant for situations where we want to know the results
of a merge, but not immediately overlay a record in the database.

pgTAP tests are available in t/marc_merge.pg.

Signed-off-by: Galen Charlton <gmc@equinoxinitiative.org>
Signed-off-by: Kathy Lussier <klussier@masslnc.org>
Open-ILS/src/sql/Pg/012.schema.vandelay.sql
Open-ILS/src/sql/Pg/t/marc_merge.pg [new file with mode: 0644]
Open-ILS/src/sql/Pg/upgrade/XXXX.schema.more_overlay_funcs.sql [new file with mode: 0644]

index ff474cd..d60e01b 100644 (file)
@@ -1452,6 +1452,60 @@ BEGIN
 END;
 $$ LANGUAGE PLPGSQL;
 
+CREATE OR REPLACE FUNCTION vandelay.merge_record_xml_using_profile ( incoming_marc TEXT, existing_marc TEXT, merge_profile_id BIGINT ) RETURNS TEXT AS $$
+DECLARE
+    merge_profile   vandelay.merge_profile%ROWTYPE;
+    dyn_profile     vandelay.compile_profile%ROWTYPE;
+    target_marc     TEXT;
+    source_marc     TEXT;
+    replace_rule    TEXT;
+    match_count     INT;
+BEGIN
+
+    IF existing_marc IS NULL OR incoming_marc IS NULL THEN
+        -- RAISE NOTICE 'no marc for source or target records';
+        RETURN NULL;
+    END IF;
+
+    IF merge_profile_id IS NOT NULL THEN
+        SELECT * INTO merge_profile FROM vandelay.merge_profile WHERE id = merge_profile_id;
+        IF FOUND THEN
+            dyn_profile.add_rule := COALESCE(merge_profile.add_spec,'');
+            dyn_profile.strip_rule := COALESCE(merge_profile.strip_spec,'');
+            dyn_profile.replace_rule := COALESCE(merge_profile.replace_spec,'');
+            dyn_profile.preserve_rule := COALESCE(merge_profile.preserve_spec,'');
+        ELSE
+            -- RAISE NOTICE 'merge profile not found';
+            RETURN NULL;
+        END IF;
+    ELSE
+        -- RAISE NOTICE 'no merge profile specified';
+        RETURN NULL;
+    END IF;
+
+    IF dyn_profile.replace_rule <> '' AND dyn_profile.preserve_rule <> '' THEN
+        -- RAISE NOTICE 'both replace [%] and preserve [%] specified', dyn_profile.replace_rule, dyn_profile.preserve_rule;
+        RETURN NULL;
+    END IF;
+
+    IF dyn_profile.replace_rule = '' AND dyn_profile.preserve_rule = '' AND dyn_profile.add_rule = '' AND dyn_profile.strip_rule = '' THEN
+        -- Since we have nothing to do, just return a target record as is
+        RETURN existing_marc;
+    ELSIF dyn_profile.preserve_rule <> '' THEN
+        source_marc = existing_marc;
+        target_marc = incoming_marc;
+        replace_rule = dyn_profile.preserve_rule;
+    ELSE
+        source_marc = incoming_marc;
+        target_marc = existing_marc;
+        replace_rule = dyn_profile.replace_rule;
+    END IF;
+
+    RETURN vandelay.merge_record_xml( target_marc, source_marc, dyn_profile.add_rule, replace_rule, dyn_profile.strip_rule );
+
+END;
+$$ LANGUAGE PLPGSQL;
+
 CREATE OR REPLACE FUNCTION vandelay.template_overlay_bib_record ( v_marc TEXT, eg_id BIGINT) RETURNS BOOL AS $$
     SELECT vandelay.template_overlay_bib_record( $1, $2, NULL);
 $$ LANGUAGE SQL;
diff --git a/Open-ILS/src/sql/Pg/t/marc_merge.pg b/Open-ILS/src/sql/Pg/t/marc_merge.pg
new file mode 100644 (file)
index 0000000..e790f7a
--- /dev/null
@@ -0,0 +1,128 @@
+BEGIN;
+
+SELECT plan(4);
+
+CREATE OR REPLACE FUNCTION test_marc_merges() RETURNS SETOF TEXT AS $func$
+DECLARE
+    incoming_marc TEXT;
+    existing_marc TEXT;
+    merge_profile_1 BIGINT;
+    merge_profile_2 BIGINT;
+    merge_profile_3 BIGINT;
+    merge_profile_4 BIGINT;
+BEGIN
+    incoming_marc := $$<record    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"    xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00531nam a2200157 a 4500</leader>
+  <controlfield tag="005">20080729170300.0</controlfield>
+  <controlfield tag="008">      t19981999enka              0 eng  </controlfield>
+  <datafield tag="245" ind1="1" ind2="4">
+    <subfield code="a">merge-example-incoming</subfield>
+  </datafield>
+  <datafield tag="505" ind1="1" ind2="4">
+    <subfield code="a">contents-notes-incoming</subfield>
+  </datafield>
+  <datafield tag="856" ind1="1" ind2="4">
+    <subfield code="u">https://example.org/page-incoming</subfield>
+  </datafield>
+</record>$$;
+    existing_marc := $$<record    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"    xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00531nam a2200157 a 4500</leader>
+  <controlfield tag="005">20080729170300.0</controlfield>
+  <controlfield tag="008">      t19981999enka              0 eng  </controlfield>
+  <datafield tag="245" ind1="1" ind2="4">
+    <subfield code="a">merge-example-existing</subfield>
+  </datafield>
+  <datafield tag="505" ind1="1" ind2="4">
+    <subfield code="a">contents-notes-existing</subfield>
+  </datafield>
+  <datafield tag="856" ind1="1" ind2="4">
+    <subfield code="u">https://example.org/page-existing</subfield>
+  </datafield>
+</record>$$;
+
+    INSERT INTO vandelay.merge_profile (owner, name, add_spec, replace_spec, strip_spec, preserve_spec)
+        VALUES (1, 'Test MP 1', '505', NULL, NULL, NULL);
+    SELECT CURRVAL('vandelay.merge_profile_id_seq') INTO merge_profile_1;
+    INSERT INTO vandelay.merge_profile (owner, name, add_spec, replace_spec, strip_spec, preserve_spec)
+        VALUES (1, 'Test MP 2', NULL, '505', NULL, NULL);
+    SELECT CURRVAL('vandelay.merge_profile_id_seq') INTO merge_profile_2;
+    INSERT INTO vandelay.merge_profile (owner, name, add_spec, replace_spec, strip_spec, preserve_spec)
+        VALUES (1, 'Test MP 3', NULL, NULL, NULL, '505');
+    SELECT CURRVAL('vandelay.merge_profile_id_seq') INTO merge_profile_3;
+    INSERT INTO vandelay.merge_profile (owner, name, add_spec, replace_spec, strip_spec, preserve_spec)
+        VALUES (1, 'Test MP 4', '856', '505', NULL, NULL);
+    SELECT CURRVAL('vandelay.merge_profile_id_seq') INTO merge_profile_4;
+
+    -- using evergreen.xml_pretty_print(textval::XML)::TEXT to normalize the
+    -- values we're comparing
+    RETURN NEXT is(evergreen.xml_pretty_print(vandelay.merge_record_xml_using_profile(incoming_marc, existing_marc, merge_profile_1)::XML)::TEXT, evergreen.xml_pretty_print($$<record    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"    xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00531nam a2200157 a 4500</leader>
+  <controlfield tag="005">20080729170300.0</controlfield>
+  <controlfield tag="008">      t19981999enka              0 eng  </controlfield>
+  <datafield tag="245" ind1="1" ind2="4">
+    <subfield code="a">merge-example-existing</subfield>
+  </datafield>
+  <datafield tag="505" ind1="1" ind2="4">
+    <subfield code="a">contents-notes-incoming</subfield>
+  </datafield>
+  <datafield tag="505" ind1="1" ind2="4">
+    <subfield code="a">contents-notes-existing</subfield>
+  </datafield>
+  <datafield tag="856" ind1="1" ind2="4">
+    <subfield code="u">https://example.org/page-existing</subfield>
+  </datafield>
+</record>$$::XML)::TEXT, 'merge with add 505 rule');
+
+    RETURN NEXT is(evergreen.xml_pretty_print(vandelay.merge_record_xml_using_profile(incoming_marc, existing_marc, merge_profile_2)::XML)::TEXT, evergreen.xml_pretty_print($$<record    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"    xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00531nam a2200157 a 4500</leader>
+  <controlfield tag="005">20080729170300.0</controlfield>
+  <controlfield tag="008">      t19981999enka              0 eng  </controlfield>
+  <datafield tag="245" ind1="1" ind2="4">
+    <subfield code="a">merge-example-existing</subfield>
+  </datafield>
+  <datafield tag="505" ind1="1" ind2="4">
+    <subfield code="a">contents-notes-incoming</subfield>
+  </datafield>
+  <datafield tag="856" ind1="1" ind2="4">
+    <subfield code="u">https://example.org/page-existing</subfield>
+  </datafield>
+</record>$$::XML)::TEXT, 'merge with replace 505 rule');
+
+    RETURN NEXT is(evergreen.xml_pretty_print(vandelay.merge_record_xml_using_profile(incoming_marc, existing_marc, merge_profile_3)::XML)::TEXT, evergreen.xml_pretty_print($$<record    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"    xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00531nam a2200157 a 4500</leader>
+  <controlfield tag="005">20080729170300.0</controlfield>
+  <controlfield tag="008">      t19981999enka              0 eng  </controlfield>
+  <datafield tag="245" ind1="1" ind2="4">
+    <subfield code="a">merge-example-incoming</subfield>
+  </datafield>
+  <datafield tag="505" ind1="1" ind2="4">
+    <subfield code="a">contents-notes-existing</subfield>
+  </datafield>
+  <datafield tag="856" ind1="1" ind2="4">
+    <subfield code="u">https://example.org/page-incoming</subfield>
+  </datafield>
+</record>$$::XML)::TEXT, 'merge with preserve 505 rule');
+
+    RETURN NEXT is(evergreen.xml_pretty_print(vandelay.merge_record_xml_using_profile(incoming_marc, existing_marc, merge_profile_4)::XML)::TEXT, evergreen.xml_pretty_print($$<record    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"    xsi:schemaLocation="http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"    xmlns="http://www.loc.gov/MARC21/slim">
+  <leader>00531nam a2200157 a 4500</leader>
+  <controlfield tag="005">20080729170300.0</controlfield>
+  <controlfield tag="008">      t19981999enka              0 eng  </controlfield>
+  <datafield tag="245" ind1="1" ind2="4">
+    <subfield code="a">merge-example-existing</subfield>
+  </datafield>
+  <datafield tag="505" ind1="1" ind2="4">
+    <subfield code="a">contents-notes-incoming</subfield>
+  </datafield>
+  <datafield tag="856" ind1="1" ind2="4">
+    <subfield code="u">https://example.org/page-incoming</subfield>
+  </datafield>
+  <datafield tag="856" ind1="1" ind2="4">
+    <subfield code="u">https://example.org/page-existing</subfield>
+  </datafield>
+</record>$$::XML)::TEXT, 'merge with add 856 and replace 505 rule');
+END;
+$func$ LANGUAGE plpgsql;
+
+SELECT * FROM test_marc_merges();
+
+ROLLBACK;
diff --git a/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.more_overlay_funcs.sql b/Open-ILS/src/sql/Pg/upgrade/XXXX.schema.more_overlay_funcs.sql
new file mode 100644 (file)
index 0000000..e3bfa2e
--- /dev/null
@@ -0,0 +1,59 @@
+BEGIN;
+
+-- SELECT evergreen.upgrade_deps_block_check('XXXX', :eg_version);
+
+CREATE OR REPLACE FUNCTION vandelay.merge_record_xml_using_profile ( incoming_marc TEXT, existing_marc TEXT, merge_profile_id BIGINT ) RETURNS TEXT AS $$
+DECLARE
+    merge_profile   vandelay.merge_profile%ROWTYPE;
+    dyn_profile     vandelay.compile_profile%ROWTYPE;
+    target_marc     TEXT;
+    source_marc     TEXT;
+    replace_rule    TEXT;
+    match_count     INT;
+BEGIN
+
+    IF existing_marc IS NULL OR incoming_marc IS NULL THEN
+        -- RAISE NOTICE 'no marc for source or target records';
+        RETURN NULL;
+    END IF;
+
+    IF merge_profile_id IS NOT NULL THEN
+        SELECT * INTO merge_profile FROM vandelay.merge_profile WHERE id = merge_profile_id;
+        IF FOUND THEN
+            dyn_profile.add_rule := COALESCE(merge_profile.add_spec,'');
+            dyn_profile.strip_rule := COALESCE(merge_profile.strip_spec,'');
+            dyn_profile.replace_rule := COALESCE(merge_profile.replace_spec,'');
+            dyn_profile.preserve_rule := COALESCE(merge_profile.preserve_spec,'');
+        ELSE
+            -- RAISE NOTICE 'merge profile not found';
+            RETURN NULL;
+        END IF;
+    ELSE
+        -- RAISE NOTICE 'no merge profile specified';
+        RETURN NULL;
+    END IF;
+
+    IF dyn_profile.replace_rule <> '' AND dyn_profile.preserve_rule <> '' THEN
+        -- RAISE NOTICE 'both replace [%] and preserve [%] specified', dyn_profile.replace_rule, dyn_profile.preserve_rule;
+        RETURN NULL;
+    END IF;
+
+    IF dyn_profile.replace_rule = '' AND dyn_profile.preserve_rule = '' AND dyn_profile.add_rule = '' AND dyn_profile.strip_rule = '' THEN
+        -- Since we have nothing to do, just return a target record as is
+        RETURN existing_marc;
+    ELSIF dyn_profile.preserve_rule <> '' THEN
+        source_marc = existing_marc;
+        target_marc = incoming_marc;
+        replace_rule = dyn_profile.preserve_rule;
+    ELSE
+        source_marc = incoming_marc;
+        target_marc = existing_marc;
+        replace_rule = dyn_profile.replace_rule;
+    END IF;
+
+    RETURN vandelay.merge_record_xml( target_marc, source_marc, dyn_profile.add_rule, replace_rule, dyn_profile.strip_rule );
+
+END;
+$$ LANGUAGE PLPGSQL;
+
+COMMIT;