3 SELECT evergreen.upgrade_deps_block_check('0909', :eg_version);
5 ALTER TABLE vandelay.authority_match
6 ADD COLUMN match_score INT NOT NULL DEFAULT 0;
8 -- support heading=TRUE match set points
9 ALTER TABLE vandelay.match_set_point
10 ADD COLUMN heading BOOLEAN NOT NULL DEFAULT FALSE,
11 DROP CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_bo,
12 ADD CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_heading_or_a_bo
14 (tag IS NOT NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NULL) OR
15 (tag IS NULL AND svf IS NOT NULL AND heading IS FALSE AND bool_op IS NULL) OR
16 (tag IS NULL AND svf IS NULL AND heading IS TRUE AND bool_op IS NULL) OR
17 (tag IS NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NOT NULL)
20 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
26 root vandelay.match_set_point;
28 SELECT * INTO root FROM vandelay.match_set_point
29 WHERE parent IS NULL AND match_set = match_set_id;
31 RETURN vandelay.get_expr_from_match_set_point(
32 root, tags_rstore, auth_heading);
36 -- backwards compat version so we don't have
37 -- to modify vandelay.match_set_test_marcxml()
38 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
43 RETURN vandelay.get_expr_from_match_set(
44 match_set_id, tags_rstore, NULL);
49 DROP FUNCTION IF EXISTS
50 vandelay.get_expr_from_match_set_point(vandelay.match_set_point, HSTORE);
52 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
53 node vandelay.match_set_point,
62 child vandelay.match_set_point;
64 SELECT ARRAY_AGG(id) INTO children FROM vandelay.match_set_point
65 WHERE parent = node.id;
67 IF ARRAY_LENGTH(children, 1) > 0 THEN
68 this_op := vandelay._get_expr_render_one(node);
71 WHILE children[i] IS NOT NULL LOOP
72 SELECT * INTO child FROM vandelay.match_set_point
73 WHERE id = children[i];
75 q := q || ' ' || this_op || ' ';
78 q := q || vandelay.get_expr_from_match_set_point(
79 child, tags_rstore, auth_heading);
83 ELSIF node.bool_op IS NULL THEN
84 PERFORM vandelay._get_expr_push_qrow(node);
85 PERFORM vandelay._get_expr_push_jrow(node, tags_rstore, auth_heading);
86 RETURN vandelay._get_expr_render_one(node);
94 DROP FUNCTION IF EXISTS
95 vandelay._get_expr_push_jrow(vandelay.match_set_point, HSTORE);
97 CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
98 node vandelay.match_set_point,
113 -- remember $1 is tags_rstore, and $2 is svf_rstore
114 -- a non-NULL auth_heading means we're matching authority records
116 IF auth_heading IS NOT NULL THEN
117 rec_table := 'authority.full_rec';
119 rec_table := 'metabib.full_rec';
123 SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
124 IF jrow_count > 0 THEN
125 my_using := ' USING (record)';
126 my_join := 'FULL OUTER JOIN';
132 IF node.tag IS NOT NULL THEN
133 caseless := (node.tag IN ('020', '022', '024'));
135 IF node.subfield IS NOT NULL THEN
136 tagkey := tagkey || node.subfield;
154 my_alias := 'n' || node.id::TEXT;
156 jrow := my_join || ' (SELECT *, ';
157 IF node.tag IS NOT NULL THEN
158 jrow := jrow || node.quality ||
159 ' AS quality FROM ' || rec_table || ' mfr WHERE mfr.tag = ''' ||
161 IF node.subfield IS NOT NULL THEN
162 jrow := jrow || ' AND mfr.subfield = ''' ||
163 node.subfield || '''';
165 jrow := jrow || ' AND (';
166 jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
167 jrow := jrow || ')) ' || my_alias || my_using || E'\n';
169 IF auth_heading IS NOT NULL THEN -- authority record
170 IF node.heading AND auth_heading <> '' THEN
171 jrow := jrow || 'id AS record, ' || node.quality ||
172 ' AS quality FROM authority.record_entry are ' ||
173 ' WHERE are.heading = ''' || auth_heading || '''';
174 jrow := jrow || ') ' || my_alias || my_using || E'\n';
177 jrow := jrow || 'id AS record, ' || node.quality ||
178 ' AS quality FROM metabib.record_attr_flat mraf WHERE mraf.attr = ''' ||
179 node.svf || ''' AND mraf.value ' || op || ' $2->''' || node.svf || ''') ' ||
180 my_alias || my_using || E'\n';
183 INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
188 CREATE OR REPLACE FUNCTION vandelay.match_set_test_authxml(
189 match_set_id INTEGER, record_xml TEXT
190 ) RETURNS SETOF vandelay.match_set_test_result AS $$
201 tags_rstore := vandelay.flatten_marc_hstore(record_xml);
203 SELECT normalize_heading INTO heading
204 FROM authority.normalize_heading(record_xml);
206 CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
207 CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
209 -- generate the where clause and return that directly (into wq), and as
210 -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
211 wq := vandelay.get_expr_from_match_set(
212 match_set_id, tags_rstore, heading);
214 query_ := 'SELECT DISTINCT(record), ';
216 -- qrows table is for the quality bits we add to the SELECT clause
218 'COALESCE(n' || q::TEXT || '.quality, 0)', ' + '
219 ) INTO coal FROM _vandelay_tmp_qrows;
221 -- our query string so far is the SELECT clause and the inital FROM.
222 -- no JOINs yet nor the WHERE clause
223 query_ := query_ || coal || ' AS quality ' || E'\n';
225 -- jrows table is for the joins we must make (and the real text conditions)
226 SELECT STRING_AGG(j, E'\n') INTO joins
227 FROM _vandelay_tmp_jrows;
229 -- add those joins and the where clause to our query.
230 query_ := query_ || joins || E'\n';
232 query_ := query_ || 'JOIN authority.record_entry are ON (are.id = record) '
233 || 'WHERE ' || wq || ' AND not are.deleted';
235 -- this will return rows of record,quality
236 FOR rec IN EXECUTE query_ USING tags_rstore LOOP
240 DROP TABLE _vandelay_tmp_qrows;
241 DROP TABLE _vandelay_tmp_jrows;
246 CREATE OR REPLACE FUNCTION vandelay.measure_auth_record_quality
247 ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$
251 test vandelay.match_set_quality%ROWTYPE;
254 FOR test IN SELECT * FROM vandelay.match_set_quality
255 WHERE match_set = match_set_id LOOP
256 IF test.tag IS NOT NULL THEN
257 FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml )
258 WHERE tag = test.tag AND subfield = test.subfield LOOP
259 IF test.value = rvalue THEN
260 out_q := out_q + test.quality;
268 $_$ LANGUAGE PLPGSQL;
272 CREATE OR REPLACE FUNCTION vandelay.match_authority_record() RETURNS TRIGGER AS $func$
274 incoming_existing_id TEXT;
275 test_result vandelay.match_set_test_result%ROWTYPE;
279 IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
283 DELETE FROM vandelay.authority_match WHERE queued_record = NEW.id;
285 SELECT q.match_set INTO match_set FROM vandelay.authority_queue q WHERE q.id = NEW.queue;
287 IF match_set IS NOT NULL THEN
288 NEW.quality := vandelay.measure_auth_record_quality( NEW.marc, match_set );
291 -- Perfect matches on 901$c exit early with a match with high quality.
292 incoming_existing_id :=
293 oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc);
295 IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN
296 SELECT id INTO tmp_rec FROM authority.record_entry WHERE id = incoming_existing_id::bigint;
297 IF tmp_rec IS NOT NULL THEN
298 INSERT INTO vandelay.authority_match (queued_record, eg_record, match_score, quality)
303 -- note: no match_set means quality==0
304 vandelay.measure_auth_record_quality( b.marc, match_set )
305 FROM authority.record_entry b
306 WHERE id = incoming_existing_id::bigint;
310 IF match_set IS NULL THEN
314 FOR test_result IN SELECT * FROM
315 vandelay.match_set_test_authxml(match_set, NEW.marc) LOOP
317 INSERT INTO vandelay.authority_match ( queued_record, eg_record, match_score, quality )
322 vandelay.measure_auth_record_quality( b.marc, match_set )
323 FROM authority.record_entry b
324 WHERE id = test_result.record;
330 $func$ LANGUAGE PLPGSQL;
332 CREATE TRIGGER zz_match_auths_trigger
333 BEFORE INSERT OR UPDATE ON vandelay.queued_authority_record
334 FOR EACH ROW EXECUTE PROCEDURE vandelay.match_authority_record();
336 CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value_p NUMERIC ) RETURNS BOOL AS $$
339 lwm_ratio_value NUMERIC;
342 lwm_ratio_value := COALESCE(lwm_ratio_value_p, 0.0);
344 PERFORM * FROM vandelay.queued_authority_record WHERE import_time IS NOT NULL AND id = import_id;
347 -- RAISE NOTICE 'already imported, cannot auto-overlay'
351 SELECT m.eg_record INTO eg_id
352 FROM vandelay.authority_match m
353 JOIN vandelay.queued_authority_record qr ON (m.queued_record = qr.id)
354 JOIN vandelay.authority_queue q ON (qr.queue = q.id)
355 JOIN authority.record_entry r ON (r.id = m.eg_record)
356 WHERE m.queued_record = import_id
357 AND qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC >= lwm_ratio_value
358 ORDER BY m.match_score DESC, -- required match score
359 qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC DESC, -- quality tie breaker
360 m.id -- when in doubt, use the first match
363 IF eg_id IS NULL THEN
364 -- RAISE NOTICE 'incoming record is not of high enough quality';
368 RETURN vandelay.overlay_authority_record( import_id, eg_id, merge_profile_id );