Link checker: DB layer fixes
[working/Evergreen.git] / Open-ILS / src / sql / Pg / upgrade / YYYY.functions.url_verify.sql
1 BEGIN;
2
3 CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$
4
5 use Rose::URI;
6
7 my $url_in = shift;
8 my $url = Rose::URI->new($url_in);
9
10 my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/;
11
12 $parts{full_url} = $url_in;
13 ($parts{domain} = $parts{host}) =~ s/^[^.]+\.//;
14 ($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//;
15 ($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##;
16
17 return \%parts;
18
19 $$ LANGUAGE PLPERLU;
20
21 CREATE OR REPLACE FUNCTION url_verify.ingest_url () RETURNS TRIGGER AS $$
22 DECLARE
23     tmp_row url_verify.url%ROWTYPE;
24 BEGIN
25     SELECT * INTO tmp_row FROM url_verify.parse_url(NEW.full_url);
26
27     NEW.scheme          := tmp_row.scheme;
28     NEW.username        := tmp_row.username;
29     NEW.password        := tmp_row.password;
30     NEW.host            := tmp_row.host;
31     NEW.domain          := tmp_row.domain;
32     NEW.tld             := tmp_row.tld;
33     NEW.port            := tmp_row.port;
34     NEW.path            := tmp_row.path;
35     NEW.page            := tmp_row.page;
36     NEW.query           := tmp_row.query;
37     NEW.fragment        := tmp_row.fragment;
38
39     RETURN NEW;
40 END;
41 $$ LANGUAGE PLPGSQL;
42
43 CREATE TRIGGER ingest_url_tgr
44     BEFORE INSERT ON url_verify.url
45     FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url(); 
46
47 CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$
48 DECLARE
49     current_tag TEXT;
50     current_sf TEXT;
51     current_url TEXT;
52     current_ord INT;
53     current_url_pos INT;
54     current_selector url_verify.url_selector%ROWTYPE;
55 BEGIN
56     current_ord := 1;
57
58     FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
59         current_url_pos := 1;
60         LOOP
61             SELECT  (XPATH(current_selector.xpath || '/text()', b.marc))[current_url_pos]::TEXT INTO current_url
62               FROM  biblio.record_entry b
63                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
64               WHERE c.id = item_id;
65     
66             EXIT WHEN current_url IS NULL;
67     
68             SELECT  (XPATH(current_selector.xpath || '/../@tag', b.marc))[current_url_pos]::TEXT INTO current_tag
69               FROM  biblio.record_entry b
70                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
71               WHERE c.id = item_id;
72     
73             SELECT  (XPATH(current_selector.xpath || '/@subfield', b.marc))[current_url_pos]::TEXT INTO current_sf
74               FROM  biblio.record_entry b
75                     JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
76               WHERE c.id = item_id;
77     
78             INSERT INTO url_verify.url (item, url_selector, tag, subfield, ord, full_url)
79               VALUES ( item_id, current_selector.id, current_tag, current_sf, current_ord, current_url);
80
81             current_url_pos := current_url_pos + 1;
82             current_ord := current_ord + 1;
83         END LOOP;
84     END LOOP;
85
86     RETURN current_ord - 1;
87 END;
88 $$ LANGUAGE PLPGSQL;
89
90 COMMIT;
91