Stamping upgrade script for separating fingerprint components
[Evergreen.git] / Open-ILS / src / sql / Pg / upgrade / 1017.schema.update_fingerprinting.sql
1 BEGIN;
2
3 SELECT evergreen.upgrade_deps_block_check('1017', :eg_version);
4
5 CREATE OR REPLACE FUNCTION biblio.extract_fingerprint ( marc text ) RETURNS TEXT AS $func$
6 DECLARE
7         idx             config.biblio_fingerprint%ROWTYPE;
8         xfrm            config.xml_transform%ROWTYPE;
9         prev_xfrm       TEXT;
10         transformed_xml TEXT;
11         xml_node        TEXT;
12         xml_node_list   TEXT[];
13         raw_text        TEXT;
14     output_text TEXT := '';
15 BEGIN
16
17     IF marc IS NULL OR marc = '' THEN
18         RETURN NULL;
19     END IF;
20
21         -- Loop over the indexing entries
22         FOR idx IN SELECT * FROM config.biblio_fingerprint ORDER BY format, id LOOP
23
24                 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
25
26                 -- See if we can skip the XSLT ... it's expensive
27                 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
28                         -- Can't skip the transform
29                         IF xfrm.xslt <> '---' THEN
30                                 transformed_xml := oils_xslt_process(marc,xfrm.xslt);
31                         ELSE
32                                 transformed_xml := marc;
33                         END IF;
34
35                         prev_xfrm := xfrm.name;
36                 END IF;
37
38                 raw_text := COALESCE(
39             naco_normalize(
40                 ARRAY_TO_STRING(
41                     oils_xpath(
42                         '//text()',
43                         (oils_xpath(
44                             idx.xpath,
45                             transformed_xml,
46                             ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] 
47                         ))[1]
48                     ),
49                     ''
50                 )
51             ),
52             ''
53         );
54
55         raw_text := REGEXP_REPLACE(raw_text, E'\\[.+?\\]', E'');
56         raw_text := REGEXP_REPLACE(raw_text, E'\\mthe\\M|\\man?d?d\\M', E'', 'g'); -- arg! the pain!
57
58         IF idx.first_word IS TRUE THEN
59             raw_text := REGEXP_REPLACE(raw_text, E'^(\\w+).*?$', E'\\1');
60         END IF;
61
62                 output_text := output_text || idx.name || ':' ||
63                                            REGEXP_REPLACE(raw_text, E'\\s+', '', 'g') || ' ';
64
65         END LOOP;
66
67     RETURN BTRIM(output_text);
68
69 END;
70 $func$ LANGUAGE PLPGSQL;
71
72 COMMIT;
73
74 \qecho Recalculating bib fingerprints
75 ALTER TABLE biblio.record_entry DISABLE TRIGGER USER;
76 UPDATE biblio.record_entry SET fingerprint = biblio.extract_fingerprint(marc) WHERE NOT deleted;
77 ALTER TABLE biblio.record_entry ENABLE TRIGGER USER;
78
79 SELECT metabib.remap_metarecord_for_bib(id, fingerprint)
80 FROM biblio.record_entry
81 WHERE NOT deleted;
82
83 \qecho Remapping metarecords
84 SELECT metabib.remap_metarecord_for_bib(id, fingerprint)
85 FROM biblio.record_entry
86 WHERE NOT deleted;