3 ALTER TABLE vandelay.authority_match
4 ADD COLUMN match_score INT NOT NULL DEFAULT 0;
6 -- support heading=TRUE match set points
7 ALTER TABLE vandelay.match_set_point
8 ADD COLUMN heading BOOLEAN NOT NULL DEFAULT FALSE,
9 DROP CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_bo,
10 ADD CONSTRAINT vmsp_need_a_tag_or_a_ff_or_a_heading_or_a_bo
12 (tag IS NOT NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NULL) OR
13 (tag IS NULL AND svf IS NOT NULL AND heading IS FALSE AND bool_op IS NULL) OR
14 (tag IS NULL AND svf IS NULL AND heading IS TRUE AND bool_op IS NULL) OR
15 (tag IS NULL AND svf IS NULL AND heading IS FALSE AND bool_op IS NOT NULL)
18 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
24 root vandelay.match_set_point;
26 SELECT * INTO root FROM vandelay.match_set_point
27 WHERE parent IS NULL AND match_set = match_set_id;
29 RETURN vandelay.get_expr_from_match_set_point(
30 root, tags_rstore, auth_heading);
34 -- backwards compat version so we don't have
35 -- to modify vandelay.match_set_test_marcxml()
36 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set(
41 RETURN vandelay.get_expr_from_match_set(
42 match_set_id, tags_rstore, NULL);
47 DROP FUNCTION IF EXISTS
48 vandelay.get_expr_from_match_set_point(vandelay.match_set_point, HSTORE);
50 CREATE OR REPLACE FUNCTION vandelay.get_expr_from_match_set_point(
51 node vandelay.match_set_point,
60 child vandelay.match_set_point;
62 SELECT ARRAY_AGG(id) INTO children FROM vandelay.match_set_point
63 WHERE parent = node.id;
65 IF ARRAY_LENGTH(children, 1) > 0 THEN
66 this_op := vandelay._get_expr_render_one(node);
69 WHILE children[i] IS NOT NULL LOOP
70 SELECT * INTO child FROM vandelay.match_set_point
71 WHERE id = children[i];
73 q := q || ' ' || this_op || ' ';
76 q := q || vandelay.get_expr_from_match_set_point(
77 child, tags_rstore, auth_heading);
81 ELSIF node.bool_op IS NULL THEN
82 PERFORM vandelay._get_expr_push_qrow(node);
83 PERFORM vandelay._get_expr_push_jrow(node, tags_rstore, auth_heading);
84 RETURN vandelay._get_expr_render_one(node);
92 DROP FUNCTION IF EXISTS
93 vandelay._get_expr_push_jrow(vandelay.match_set_point, HSTORE);
95 CREATE OR REPLACE FUNCTION vandelay._get_expr_push_jrow(
96 node vandelay.match_set_point,
111 -- remember $1 is tags_rstore, and $2 is svf_rstore
112 -- a non-NULL auth_heading means we're matching authority records
114 IF auth_heading IS NOT NULL THEN
115 rec_table := 'authority.full_rec';
117 rec_table := 'metabib.full_rec';
121 SELECT COUNT(*) INTO jrow_count FROM _vandelay_tmp_jrows;
122 IF jrow_count > 0 THEN
123 my_using := ' USING (record)';
124 my_join := 'FULL OUTER JOIN';
130 IF node.tag IS NOT NULL THEN
131 caseless := (node.tag IN ('020', '022', '024'));
133 IF node.subfield IS NOT NULL THEN
134 tagkey := tagkey || node.subfield;
152 my_alias := 'n' || node.id::TEXT;
154 jrow := my_join || ' (SELECT *, ';
155 IF node.tag IS NOT NULL THEN
156 jrow := jrow || node.quality ||
157 ' AS quality FROM ' || rec_table || ' mfr WHERE mfr.tag = ''' ||
159 IF node.subfield IS NOT NULL THEN
160 jrow := jrow || ' AND mfr.subfield = ''' ||
161 node.subfield || '''';
163 jrow := jrow || ' AND (';
164 jrow := jrow || vandelay._node_tag_comparisons(caseless, op, tags_rstore, tagkey);
165 jrow := jrow || ')) ' || my_alias || my_using || E'\n';
167 IF auth_heading IS NOT NULL THEN -- authority record
168 IF node.heading AND auth_heading <> '' THEN
169 jrow := jrow || 'id AS record, ' || node.quality ||
170 ' AS quality FROM authority.record_entry are ' ||
171 ' WHERE are.heading = ''' || auth_heading || '''';
172 jrow := jrow || ') ' || my_alias || my_using || E'\n';
175 jrow := jrow || 'id AS record, ' || node.quality ||
176 ' AS quality FROM metabib.record_attr_flat mraf WHERE mraf.attr = ''' ||
177 node.svf || ''' AND mraf.value ' || op || ' $2->''' || node.svf || ''') ' ||
178 my_alias || my_using || E'\n';
181 INSERT INTO _vandelay_tmp_jrows (j) VALUES (jrow);
186 CREATE OR REPLACE FUNCTION vandelay.match_set_test_authxml(
187 match_set_id INTEGER, record_xml TEXT
188 ) RETURNS SETOF vandelay.match_set_test_result AS $$
199 tags_rstore := vandelay.flatten_marc_hstore(record_xml);
201 SELECT normalize_heading INTO heading
202 FROM authority.normalize_heading(record_xml);
204 CREATE TEMPORARY TABLE _vandelay_tmp_qrows (q INTEGER);
205 CREATE TEMPORARY TABLE _vandelay_tmp_jrows (j TEXT);
207 -- generate the where clause and return that directly (into wq), and as
208 -- a side-effect, populate the _vandelay_tmp_[qj]rows tables.
209 wq := vandelay.get_expr_from_match_set(
210 match_set_id, tags_rstore, heading);
212 query_ := 'SELECT DISTINCT(record), ';
214 -- qrows table is for the quality bits we add to the SELECT clause
216 'COALESCE(n' || q::TEXT || '.quality, 0)', ' + '
217 ) INTO coal FROM _vandelay_tmp_qrows;
219 -- our query string so far is the SELECT clause and the inital FROM.
220 -- no JOINs yet nor the WHERE clause
221 query_ := query_ || coal || ' AS quality ' || E'\n';
223 -- jrows table is for the joins we must make (and the real text conditions)
224 SELECT STRING_AGG(j, E'\n') INTO joins
225 FROM _vandelay_tmp_jrows;
227 -- add those joins and the where clause to our query.
228 query_ := query_ || joins || E'\n';
230 query_ := query_ || 'JOIN authority.record_entry are ON (are.id = record) '
231 || 'WHERE ' || wq || ' AND not are.deleted';
233 -- this will return rows of record,quality
234 FOR rec IN EXECUTE query_ USING tags_rstore LOOP
238 DROP TABLE _vandelay_tmp_qrows;
239 DROP TABLE _vandelay_tmp_jrows;
244 CREATE OR REPLACE FUNCTION vandelay.measure_auth_record_quality
245 ( xml TEXT, match_set_id INT ) RETURNS INT AS $_$
249 test vandelay.match_set_quality%ROWTYPE;
252 FOR test IN SELECT * FROM vandelay.match_set_quality
253 WHERE match_set = match_set_id LOOP
254 IF test.tag IS NOT NULL THEN
255 FOR rvalue IN SELECT value FROM vandelay.flatten_marc( xml )
256 WHERE tag = test.tag AND subfield = test.subfield LOOP
257 IF test.value = rvalue THEN
258 out_q := out_q + test.quality;
266 $_$ LANGUAGE PLPGSQL;
270 CREATE OR REPLACE FUNCTION vandelay.match_authority_record() RETURNS TRIGGER AS $func$
272 incoming_existing_id TEXT;
273 test_result vandelay.match_set_test_result%ROWTYPE;
277 IF TG_OP IN ('INSERT','UPDATE') AND NEW.imported_as IS NOT NULL THEN
281 DELETE FROM vandelay.authority_match WHERE queued_record = NEW.id;
283 SELECT q.match_set INTO match_set FROM vandelay.authority_queue q WHERE q.id = NEW.queue;
285 IF match_set IS NOT NULL THEN
286 NEW.quality := vandelay.measure_auth_record_quality( NEW.marc, match_set );
289 -- Perfect matches on 901$c exit early with a match with high quality.
290 incoming_existing_id :=
291 oils_xpath_string('//*[@tag="901"]/*[@code="c"][1]', NEW.marc);
293 IF incoming_existing_id IS NOT NULL AND incoming_existing_id != '' THEN
294 SELECT id INTO tmp_rec FROM authority.record_entry WHERE id = incoming_existing_id::bigint;
295 IF tmp_rec IS NOT NULL THEN
296 INSERT INTO vandelay.authority_match (queued_record, eg_record, match_score, quality)
301 -- note: no match_set means quality==0
302 vandelay.measure_auth_record_quality( b.marc, match_set )
303 FROM authority.record_entry b
304 WHERE id = incoming_existing_id::bigint;
308 IF match_set IS NULL THEN
312 FOR test_result IN SELECT * FROM
313 vandelay.match_set_test_authxml(match_set, NEW.marc) LOOP
315 INSERT INTO vandelay.authority_match ( queued_record, eg_record, match_score, quality )
320 vandelay.measure_auth_record_quality( b.marc, match_set )
321 FROM authority.record_entry b
322 WHERE id = test_result.record;
328 $func$ LANGUAGE PLPGSQL;
330 CREATE TRIGGER zz_match_auths_trigger
331 BEFORE INSERT OR UPDATE ON vandelay.queued_authority_record
332 FOR EACH ROW EXECUTE PROCEDURE vandelay.match_authority_record();
334 CREATE OR REPLACE FUNCTION vandelay.auto_overlay_authority_record_with_best ( import_id BIGINT, merge_profile_id INT, lwm_ratio_value_p NUMERIC ) RETURNS BOOL AS $$
337 lwm_ratio_value NUMERIC;
340 lwm_ratio_value := COALESCE(lwm_ratio_value_p, 0.0);
342 PERFORM * FROM vandelay.queued_authority_record WHERE import_time IS NOT NULL AND id = import_id;
345 -- RAISE NOTICE 'already imported, cannot auto-overlay'
349 SELECT m.eg_record INTO eg_id
350 FROM vandelay.authority_match m
351 JOIN vandelay.queued_authority_record qr ON (m.queued_record = qr.id)
352 JOIN vandelay.authority_queue q ON (qr.queue = q.id)
353 JOIN authority.record_entry r ON (r.id = m.eg_record)
354 WHERE m.queued_record = import_id
355 AND qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC >= lwm_ratio_value
356 ORDER BY m.match_score DESC, -- required match score
357 qr.quality::NUMERIC / COALESCE(NULLIF(m.quality,0),1)::NUMERIC DESC, -- quality tie breaker
358 m.id -- when in doubt, use the first match
361 IF eg_id IS NULL THEN
362 -- RAISE NOTICE 'incoming record is not of high enough quality';
366 RETURN vandelay.overlay_authority_record( import_id, eg_id, merge_profile_id );