3 CREATE OR REPLACE FUNCTION url_verify.parse_url (url_in TEXT) RETURNS url_verify.url AS $$
8 my $url = Rose::URI->new($url_in);
10 my %parts = map { $_ => $url->$_ } qw/scheme username password host port path query fragment/;
12 $parts{full_url} = $url_in;
13 ($parts{domain} = $parts{host}) =~ s/^[^.]+\.//;
14 ($parts{tld} = $parts{domain}) =~ s/(?:[^.]+\.)+//;
15 ($parts{page} = $parts{path}) =~ s#(?:[^/]*/)+##;
21 CREATE OR REPLACE FUNCTION url_verify.ingest_url () RETURNS TRIGGER AS $$
23 tmp_row url_verify.url%ROWTYPE;
25 SELECT * INTO tmp_row FROM url_verify.parse_url(NEW.full_url);
27 NEW.scheme := tmp_row.scheme;
28 NEW.username := tmp_row.username;
29 NEW.password := tmp_row.password;
30 NEW.host := tmp_row.host;
31 NEW.domain := tmp_row.domain;
32 NEW.tld := tmp_row.tld;
33 NEW.port := tmp_row.port;
34 NEW.path := tmp_row.path;
35 NEW.page := tmp_row.page;
36 NEW.query := tmp_row.query;
37 NEW.fragment := tmp_row.fragment;
43 CREATE TRIGGER ingest_url_tgr
44 BEFORE INSERT ON url_verify.url
45 FOR EACH ROW EXECUTE PROCEDURE url_verify.ingest_url();
47 CREATE OR REPLACE FUNCTION url_verify.extract_urls ( session_id INT, item_id INT ) RETURNS INT AS $$
54 current_selector url_verify.url_selector%ROWTYPE;
58 FOR current_selector IN SELECT * FROM url_verify.url_selector s WHERE s.session = session_id LOOP
61 SELECT (XPATH(current_selector.xpath || '/text()', b.marc::XML))[current_url_pos]::TEXT INTO current_url
62 FROM biblio.record_entry b
63 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
66 EXIT WHEN current_url IS NULL;
68 SELECT (XPATH(current_selector.xpath || '/../@tag', b.marc::XML))[current_url_pos]::TEXT INTO current_tag
69 FROM biblio.record_entry b
70 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
73 SELECT (XPATH(current_selector.xpath || '/@code', b.marc::XML))[current_url_pos]::TEXT INTO current_sf
74 FROM biblio.record_entry b
75 JOIN container.biblio_record_entry_bucket_item c ON (c.target_biblio_record_entry = b.id)
78 INSERT INTO url_verify.url (session, item, url_selector, tag, subfield, ord, full_url)
79 VALUES ( session_id, item_id, current_selector.id, current_tag, current_sf, current_ord, current_url);
81 current_url_pos := current_url_pos + 1;
82 current_ord := current_ord + 1;
86 RETURN current_ord - 1;