]> git.evergreen-ils.org Git - working/Evergreen.git/blob - Open-ILS/src/sql/Pg/upgrade/0753.functions.url_verify.sql
Stamping upgrade for truncate-to-max-fine
[working/Evergreen.git] / Open-ILS / src / sql / Pg / upgrade / 0753.functions.url_verify.sql
1 BEGIN;
2
3 SELECT evergreen.upgrade_deps_block_check('0753', :eg_version);
4
5 CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$
6
7 use Rose::URI;
8
9 my $url_in = shift;
10 my $url = Rose::URI->new($url_in);
11
12 my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/;
13
14 $parts{full_url} = $url_in;
15 ($parts{domain} = $parts{host}) =~ s/^[^.]+\.//;
16 ($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//;
17 ($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##;
18
19 return \%parts;
20
21 $$ LANGUAGE PLPERLU;
22
23 CREATE OR REPLACE FUNCTION url_verify.ingest_url () RETURNS TRIGGER AS $$
24 DECLARE
25     tmp_row url_verify.url%ROWTYPE;
26 BEGIN
27     SELECT * INTO tmp_row FROM url_verify.parse_url(NEW.full_url);
28
29     NEW.scheme          := tmp_row.scheme;
30     NEW.username        := tmp_row.username;
31     NEW.password        := tmp_row.password;
32     NEW.host            := tmp_row.host;
33     NEW.domain          := tmp_row.domain;
34     NEW.tld             := tmp_row.tld;
35     NEW.port            := tmp_row.port;
36     NEW.path            := tmp_row.path;
37     NEW.page            := tmp_row.page;
38     NEW.query           := tmp_row.query;
39     NEW.fragment        := tmp_row.fragment;
40
41     RETURN NEW;
42 END;
43 $$ LANGUAGE PLPGSQL;
44
45 CREATE TRIGGER ingest_url_tgr
46     BEFORE INSERT ON url_verify.url
47     FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url(); 
48
49 CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$
50 DECLARE
51     last_seen_tag TEXT;
52     current_tag TEXT;
53     current_sf TEXT;
54     current_url TEXT;
55     current_ord INT;
56     current_url_pos INT;
57     current_selector url_verify.url_selector%ROWTYPE;
58 BEGIN
59     current_ord := 1;
60
61     FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
62         current_url_pos := 1;
63         LOOP
64             SELECT  (XPATH(current_selector.xpath || '/text()', b.marc::XML))[current_url_pos]::TEXT INTO current_url
65               FROM  biblio.record_entry b
66                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
67               WHERE c.id = item_id;
68
69             EXIT WHEN current_url IS NULL;
70
71             SELECT  (XPATH(current_selector.xpath || '/../@tag', b.marc::XML))[current_url_pos]::TEXT INTO current_tag
72               FROM  biblio.record_entry b
73                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
74               WHERE c.id = item_id;
75
76             IF current_tag IS NULL THEN
77                 current_tag := last_seen_tag;
78             ELSE
79                 last_seen_tag := current_tag;
80             END IF;
81
82             SELECT  (XPATH(current_selector.xpath || '/@code', b.marc::XML))[current_url_pos]::TEXT INTO current_sf
83               FROM  biblio.record_entry b
84                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
85               WHERE c.id = item_id;
86
87             INSERT INTO url_verify.url (session, item, url_selector, tag, subfield, ord, full_url)
88               VALUES ( session_id, item_id, current_selector.id, current_tag, current_sf, current_ord, current_url);
89
90             current_url_pos := current_url_pos + 1;
91             current_ord := current_ord + 1;
92         END LOOP;
93     END LOOP;
94
95     RETURN current_ord - 1;
96 END;
97 $$ LANGUAGE PLPGSQL;
98
99 COMMIT;
100