Re-apply the changes provided by 0802 for backport
authorMike Rylander <mrylander@gmail.com>
Mon, 25 Nov 2013 15:14:08 +0000 (10:14 -0500)
committerMike Rylander <mrylander@gmail.com>
Mon, 25 Nov 2013 15:14:08 +0000 (10:14 -0500)
Preserve record order of subfields for authority heading extraction

When extracting headings from authority records we currently read the
subfields of a tag in configuration order.  We should, instead, read
them in record order, to preserve the desired sorting properties that
the cataloger has encoded in the record.

Signed-off-by: Mike Rylander <mrylander@gmail.com>
Open-ILS/src/sql/Pg/002.schema.config.sql
Open-ILS/src/sql/Pg/upgrade/0848.function.authority-sf-file-order-again.sql [new file with mode: 0644]

index 069ff96..2744e49 100644 (file)
@@ -91,7 +91,7 @@ CREATE TRIGGER no_overlapping_deps
     BEFORE INSERT OR UPDATE ON config.db_patch_dependencies
     FOR EACH ROW EXECUTE PROCEDURE evergreen.array_overlap_check ('deprecates');
 
-INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0847', :eg_version); -- eeevil/dbwells
+INSERT INTO config.upgrade_log (version, applied_to) VALUES ('0848', :eg_version); -- miker
 
 CREATE TABLE config.bib_source (
        id              SERIAL  PRIMARY KEY,
diff --git a/Open-ILS/src/sql/Pg/upgrade/0848.function.authority-sf-file-order-again.sql b/Open-ILS/src/sql/Pg/upgrade/0848.function.authority-sf-file-order-again.sql
new file mode 100644 (file)
index 0000000..a53bfa4
--- /dev/null
@@ -0,0 +1,170 @@
+BEGIN;
+
+SELECT evergreen.upgrade_deps_block_check('0848', :eg_version);
+
+CREATE OR REPLACE FUNCTION authority.normalize_heading( marcxml TEXT, no_thesaurus BOOL ) RETURNS TEXT AS $func$
+DECLARE
+    acsaf           authority.control_set_authority_field%ROWTYPE;
+    tag_used        TEXT;
+    nfi_used        TEXT;
+    sf              TEXT;
+    sf_node         TEXT;
+    tag_node        TEXT;
+    thes_code       TEXT;
+    cset            INT;
+    heading_text    TEXT;
+    tmp_text        TEXT;
+    first_sf        BOOL;
+    auth_id         INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT;
+BEGIN
+    SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id;
+
+    IF cset IS NULL THEN
+        SELECT  control_set INTO cset
+          FROM  authority.control_set_authority_field
+          WHERE tag IN ( SELECT  UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]))
+          LIMIT 1;
+    END IF;
+
+    thes_code := vandelay.marc21_extract_fixed_field(marcxml,'Subj');
+    IF thes_code IS NULL THEN
+        thes_code := '|';
+    ELSIF thes_code = 'z' THEN
+        thes_code := COALESCE( oils_xpath_string('//*[@tag="040"]/*[@code="f"][1]', marcxml), '' );
+    END IF;
+
+    heading_text := '';
+    FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset AND main_entry IS NULL LOOP
+        tag_used := acsaf.tag;
+        nfi_used := acsaf.nfi;
+        first_sf := TRUE;
+
+        FOR tag_node IN SELECT unnest(oils_xpath('//*[@tag="'||tag_used||'"]',marcxml)) LOOP
+            FOR sf_node IN SELECT unnest(oils_xpath('//*[contains("'||acsaf.sf_list||'",@code)]',tag_node)) LOOP
+
+                tmp_text := oils_xpath_string('.', sf_node);
+                sf := oils_xpath_string('./@code', sf_node);
+
+                IF first_sf AND tmp_text IS NOT NULL AND nfi_used IS NOT NULL THEN
+
+                    tmp_text := SUBSTRING(
+                        tmp_text FROM
+                        COALESCE(
+                            NULLIF(
+                                REGEXP_REPLACE(
+                                    oils_xpath_string('./@ind'||nfi_used, tag_node),
+                                    $$\D+$$,
+                                    '',
+                                    'g'
+                                ),
+                                ''
+                            )::INT,
+                            0
+                        ) + 1
+                    );
+
+                END IF;
+
+                first_sf := FALSE;
+
+                IF tmp_text IS NOT NULL AND tmp_text <> '' THEN
+                    heading_text := heading_text || E'\u2021' || sf || ' ' || tmp_text;
+                END IF;
+            END LOOP;
+
+            EXIT WHEN heading_text <> '';
+        END LOOP;
+
+        EXIT WHEN heading_text <> '';
+    END LOOP;
+
+    IF heading_text <> '' THEN
+        IF no_thesaurus IS TRUE THEN
+            heading_text := tag_used || ' ' || public.naco_normalize(heading_text);
+        ELSE
+            heading_text := tag_used || '_' || COALESCE(nfi_used,'-') || '_' || thes_code || ' ' || public.naco_normalize(heading_text);
+        END IF;
+    ELSE
+        heading_text := 'NOHEADING_' || thes_code || ' ' || MD5(marcxml);
+    END IF;
+
+    RETURN heading_text;
+END;
+$func$ LANGUAGE PLPGSQL IMMUTABLE;
+
+CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$
+DECLARE
+    res             authority.simple_heading%ROWTYPE;
+    acsaf           authority.control_set_authority_field%ROWTYPE;
+    tag_used        TEXT;
+    nfi_used        TEXT;
+    sf              TEXT;
+    cset            INT;
+    heading_text    TEXT;
+    sort_text       TEXT;
+    tmp_text        TEXT;
+    tmp_xml         TEXT;
+    first_sf        BOOL;
+    auth_id         INT DEFAULT oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml)::INT;
+BEGIN
+
+    res.record := auth_id;
+
+    SELECT  control_set INTO cset
+      FROM  authority.control_set_authority_field
+      WHERE tag IN ( SELECT UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]) )
+      LIMIT 1;
+
+    FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
+
+        res.atag := acsaf.id;
+        tag_used := acsaf.tag;
+        nfi_used := acsaf.nfi;
+
+        FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)) LOOP
+
+            heading_text := public.naco_normalize(
+                COALESCE(
+                    oils_xpath_string('//*[contains("'||acsaf.sf_list||'",@code)]',tmp_xml::TEXT, ' '),
+                    ''
+                )
+            );
+
+            IF nfi_used IS NOT NULL THEN
+
+                sort_text := SUBSTRING(
+                    heading_text FROM
+                    COALESCE(
+                        NULLIF(
+                            REGEXP_REPLACE(
+                                oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT),
+                                $$\D+$$,
+                                '',
+                                'g'
+                            ),
+                            ''
+                        )::INT,
+                        0
+                    ) + 1
+                );
+
+            ELSE
+                sort_text := heading_text;
+            END IF;
+
+            IF heading_text IS NOT NULL AND heading_text <> '' THEN
+                res.value := heading_text;
+                res.sort_value := sort_text;
+                RETURN NEXT res;
+            END IF;
+
+        END LOOP;
+
+    END LOOP;
+
+    RETURN;
+END;
+$func$ LANGUAGE PLPGSQL IMMUTABLE;
+
+COMMIT;
+