3 SELECT evergreen.upgrade_deps_block_check('1069', :eg_version); --gmcharlt/kmlussier
5 -- subset of types listed in https://www.loc.gov/marc/authority/ad1xx3xx.html
6 -- for now, ignoring subdivisions
7 CREATE TYPE authority.heading_type AS ENUM (
17 'medium_of_performance_term'
20 CREATE TYPE authority.variant_heading_type AS ENUM (
29 CREATE TYPE authority.related_heading_type AS ENUM (
32 'parent organization',
39 CREATE TYPE authority.heading_purpose AS ENUM (
45 CREATE TABLE authority.heading_field (
46 id SERIAL PRIMARY KEY,
47 heading_type authority.heading_type NOT NULL,
48 heading_purpose authority.heading_purpose NOT NULL,
50 format TEXT NOT NULL REFERENCES config.xml_transform (name) DEFAULT 'mads21',
51 heading_xpath TEXT NOT NULL,
52 component_xpath TEXT NOT NULL,
53 type_xpath TEXT NULL, -- to extract related or variant type
54 thesaurus_xpath TEXT NULL,
55 thesaurus_override_xpath TEXT NULL,
59 CREATE TABLE authority.heading_field_norm_map (
60 id SERIAL PRIMARY KEY,
61 field INT NOT NULL REFERENCES authority.heading_field (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
62 norm INT NOT NULL REFERENCES config.index_normalizer (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
64 pos INT NOT NULL DEFAULT 0
67 INSERT INTO authority.heading_field(heading_type, heading_purpose, label, heading_xpath, component_xpath, type_xpath, thesaurus_xpath, thesaurus_override_xpath) VALUES
68 ( 'topical_term', 'main', 'Main Topical Term', '/mads21:mads/mads21:authority', '//mads21:topic', NULL, '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', NULL )
69 ,( 'topical_term', 'variant', 'Variant Topical Term', '/mads21:mads/mads21:variant', '//mads21:topic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', '//mads21:topic[1]/@authority')
70 ,( 'topical_term', 'related', 'Related Topical Term', '/mads21:mads/mads21:related', '//mads21:topic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', '//mads21:topic[1]/@authority')
71 ,( 'personal_name', 'main', 'Main Personal Name', '/mads21:mads/mads21:authority', '//mads21:name[@type="personal"]', NULL, NULL, NULL )
72 ,( 'personal_name', 'variant', 'Variant Personal Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="personal"]', NULL, NULL, NULL )
73 ,( 'personal_name', 'related', 'Related Personal Name', '/mads21:mads/mads21:related', '//mads21:name[@type="personal"]', '/mads21:related/@type', NULL, NULL )
74 ,( 'corporate_name', 'main', 'Main Corporate name', '/mads21:mads/mads21:authority', '//mads21:name[@type="corporate"]', NULL, NULL, NULL )
75 ,( 'corporate_name', 'variant', 'Variant Corporate Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="corporate"]', NULL, NULL, NULL )
76 ,( 'corporate_name', 'related', 'Related Corporate Name', '/mads21:mads/mads21:related', '//mads21:name[@type="corporate"]', '/mads21:related/@type', NULL, NULL )
77 ,( 'meeting_name', 'main', 'Main Meeting name', '/mads21:mads/mads21:authority', '//mads21:name[@type="conference"]', NULL, NULL, NULL )
78 ,( 'meeting_name', 'variant', 'Variant Meeting Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="conference"]', NULL, NULL, NULL )
79 ,( 'meeting_name', 'related', 'Related Meeting Name', '/mads21:mads/mads21:related', '//mads21:name[@type="meeting"]', '/mads21:related/@type', NULL, NULL )
80 ,( 'geographic_name', 'main', 'Main Geographic Term', '/mads21:mads/mads21:authority', '//mads21:geographic', NULL, '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', NULL )
81 ,( 'geographic_name', 'variant', 'Variant Geographic Term', '/mads21:mads/mads21:variant', '//mads21:geographic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
82 ,( 'geographic_name', 'related', 'Related Geographic Term', '/mads21:mads/mads21:related', '//mads21:geographic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
83 ,( 'genre_form_term', 'main', 'Main Genre/Form Term', '/mads21:mads/mads21:authority', '//mads21:genre', NULL, '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', NULL )
84 ,( 'genre_form_term', 'variant', 'Variant Genre/Form Term', '/mads21:mads/mads21:variant', '//mads21:genre', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority')
85 ,( 'genre_form_term', 'related', 'Related Genre/Form Term', '/mads21:mads/mads21:related', '//mads21:genre', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority')
86 ,( 'chronological_term', 'main', 'Main Chronological Term', '/mads21:mads/mads21:authority', '//mads21:temporal', NULL, '/mads21:mads/mads21:authority/mads21:temporal[1]/@authority', NULL )
87 ,( 'chronological_term', 'variant', 'Variant Chronological Term', '/mads21:mads/mads21:variant', '//mads21:temporal', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:temporal[1]/@authority', '//mads21:temporal[1]/@authority')
88 ,( 'chronological_term', 'related', 'Related Chronological Term', '/mads21:mads/mads21:related', '//mads21:temporal', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:temporal[1]/@authority', '//mads21:temporal[1]/@authority')
89 ,( 'uniform_title', 'main', 'Main Uniform Title', '/mads21:mads/mads21:authority', '//mads21:title', NULL, '/mads21:mads/mads21:authority/mads21:title[1]/@authority', NULL )
90 ,( 'uniform_title', 'variant', 'Variant Uniform Title', '/mads21:mads/mads21:variant', '//mads21:title', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:title[1]/@authority', '//mads21:title[1]/@authority')
91 ,( 'uniform_title', 'related', 'Related Uniform Title', '/mads21:mads/mads21:related', '//mads21:title', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:title[1]/@authority', '//mads21:title[1]/@authority')
94 -- NACO normalize all the things
95 INSERT INTO authority.heading_field_norm_map (field, norm, pos)
97 FROM authority.heading_field;
99 CREATE TYPE authority.heading AS (
101 type authority.heading_type,
102 purpose authority.heading_purpose,
103 variant_type authority.variant_heading_type,
104 related_type authority.related_heading_type,
107 normalized_heading TEXT
110 CREATE OR REPLACE FUNCTION authority.extract_headings(marc TEXT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$
112 idx authority.heading_field%ROWTYPE;
113 xfrm config.xml_transform%ROWTYPE;
115 transformed_xml TEXT;
117 heading_node_list TEXT[];
119 component_node_list TEXT[];
121 normalized_text TEXT;
126 base_thesaurus TEXT := NULL;
127 output_row authority.heading;
130 -- Loop over the indexing entries
131 FOR idx IN SELECT * FROM authority.heading_field WHERE restrict IS NULL OR id = ANY (restrict) ORDER BY format LOOP
133 output_row.field := idx.id;
134 output_row.type := idx.heading_type;
135 output_row.purpose := idx.heading_purpose;
137 joiner := COALESCE(idx.joiner, ' ');
139 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
141 -- See if we can skip the XSLT ... it's expensive
142 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
143 -- Can't skip the transform
144 IF xfrm.xslt <> '---' THEN
145 transformed_xml := oils_xslt_process(marc, xfrm.xslt);
147 transformed_xml := marc;
150 prev_xfrm := xfrm.name;
153 IF idx.thesaurus_xpath IS NOT NULL THEN
154 base_thesaurus := ARRAY_TO_STRING(oils_xpath(idx.thesaurus_xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
157 heading_node_list := oils_xpath( idx.heading_xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
159 FOR heading_node IN SELECT x FROM unnest(heading_node_list) AS x LOOP
161 CONTINUE WHEN heading_node !~ E'^\\s*<';
163 output_row.variant_type := NULL;
164 output_row.related_type := NULL;
165 output_row.thesaurus := NULL;
166 output_row.heading := NULL;
168 IF idx.heading_purpose = 'variant' AND idx.type_xpath IS NOT NULL THEN
169 type_value := ARRAY_TO_STRING(oils_xpath(idx.type_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
171 output_row.variant_type := type_value;
172 EXCEPTION WHEN invalid_text_representation THEN
173 RAISE NOTICE 'Do not recognize variant heading type %', type_value;
176 IF idx.heading_purpose = 'related' AND idx.type_xpath IS NOT NULL THEN
177 type_value := ARRAY_TO_STRING(oils_xpath(idx.type_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
179 output_row.related_type := type_value;
180 EXCEPTION WHEN invalid_text_representation THEN
181 RAISE NOTICE 'Do not recognize related heading type %', type_value;
185 IF idx.thesaurus_override_xpath IS NOT NULL THEN
186 output_row.thesaurus := ARRAY_TO_STRING(oils_xpath(idx.thesaurus_override_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
188 IF output_row.thesaurus IS NULL THEN
189 output_row.thesaurus := base_thesaurus;
194 -- now iterate over components of heading
195 component_node_list := oils_xpath( idx.component_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
196 FOR component_node IN SELECT x FROM unnest(component_node_list) AS x LOOP
197 -- XXX much of this should be moved into oils_xpath_string...
198 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
199 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
200 REGEXP_REPLACE( component_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
201 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
205 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
207 IF raw_text IS NOT NULL THEN
208 raw_text := raw_text || joiner;
211 raw_text := COALESCE(raw_text,'') || curr_text;
214 IF raw_text IS NOT NULL THEN
215 output_row.heading := raw_text;
216 normalized_text := raw_text;
219 SELECT n.func AS func,
220 n.param_count AS param_count,
222 FROM config.index_normalizer n
223 JOIN authority.heading_field_norm_map m ON (m.norm = n.id)
224 WHERE m.field = idx.id
227 EXECUTE 'SELECT ' || normalizer.func || '(' ||
228 quote_literal( normalized_text ) ||
230 WHEN normalizer.param_count > 0
231 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
234 ')' INTO normalized_text;
238 output_row.normalized_heading := normalized_text;
240 RETURN NEXT output_row;
246 $func$ LANGUAGE PLPGSQL;
248 CREATE OR REPLACE FUNCTION authority.extract_headings(rid BIGINT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$
250 auth authority.record_entry%ROWTYPE;
251 output_row authority.heading;
254 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
256 RETURN QUERY SELECT * FROM authority.extract_headings(auth.marc, restrict);
258 $func$ LANGUAGE PLPGSQL;
260 CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$
262 res authority.simple_heading%ROWTYPE;
263 acsaf authority.control_set_authority_field%ROWTYPE;
264 heading_row authority.heading%ROWTYPE;
275 auth_id INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT;
278 SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id;
281 SELECT control_set INTO cset
282 FROM authority.control_set_authority_field
283 WHERE tag IN ( SELECT UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]))
287 res.record := auth_id;
288 res.thesaurus := authority.extract_thesaurus(marcxml);
290 FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
291 res.atag := acsaf.id;
293 IF acsaf.heading_field IS NULL THEN
294 tag_used := acsaf.tag;
295 nfi_used := acsaf.nfi;
296 joiner_text := COALESCE(acsaf.joiner, ' ');
298 FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP
300 heading_text := COALESCE(
301 oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'",@code)]', tmp_xml, joiner_text),
305 IF nfi_used IS NOT NULL THEN
307 sort_text := SUBSTRING(
312 oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT),
324 sort_text := heading_text;
327 IF heading_text IS NOT NULL AND heading_text <> '' THEN
328 res.value := heading_text;
329 res.sort_value := public.naco_normalize(sort_text);
330 res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
336 FOR heading_row IN SELECT * FROM authority.extract_headings(marcxml, ARRAY[acsaf.heading_field]) LOOP
337 res.value := heading_row.heading;
338 res.sort_value := heading_row.normalized_heading;
339 res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
347 $func$ LANGUAGE PLPGSQL STABLE STRICT;
349 ALTER TABLE authority.control_set_authority_field ADD COLUMN heading_field INTEGER REFERENCES authority.heading_field(id);
351 UPDATE authority.control_set_authority_field acsaf
352 SET heading_field = ahf.id
353 FROM authority.heading_field ahf
356 AND ahf.heading_purpose = 'main'
357 AND ahf.heading_type = 'personal_name';
358 UPDATE authority.control_set_authority_field acsaf
359 SET heading_field = ahf.id
360 FROM authority.heading_field ahf
363 AND ahf.heading_purpose = 'variant'
364 AND ahf.heading_type = 'personal_name';
365 UPDATE authority.control_set_authority_field acsaf
366 SET heading_field = ahf.id
367 FROM authority.heading_field ahf
370 AND ahf.heading_purpose = 'related'
371 AND ahf.heading_type = 'personal_name';
373 UPDATE authority.control_set_authority_field acsaf
374 SET heading_field = ahf.id
375 FROM authority.heading_field ahf
378 AND ahf.heading_purpose = 'main'
379 AND ahf.heading_type = 'corporate_name';
380 UPDATE authority.control_set_authority_field acsaf
381 SET heading_field = ahf.id
382 FROM authority.heading_field ahf
385 AND ahf.heading_purpose = 'variant'
386 AND ahf.heading_type = 'corporate_name';
387 UPDATE authority.control_set_authority_field acsaf
388 SET heading_field = ahf.id
389 FROM authority.heading_field ahf
392 AND ahf.heading_purpose = 'related'
393 AND ahf.heading_type = 'corporate_name';
395 UPDATE authority.control_set_authority_field acsaf
396 SET heading_field = ahf.id
397 FROM authority.heading_field ahf
400 AND ahf.heading_purpose = 'main'
401 AND ahf.heading_type = 'meeting_name';
402 UPDATE authority.control_set_authority_field acsaf
403 SET heading_field = ahf.id
404 FROM authority.heading_field ahf
407 AND ahf.heading_purpose = 'variant'
408 AND ahf.heading_type = 'meeting_name';
409 UPDATE authority.control_set_authority_field acsaf
410 SET heading_field = ahf.id
411 FROM authority.heading_field ahf
414 AND ahf.heading_purpose = 'related'
415 AND ahf.heading_type = 'meeting_name';
417 UPDATE authority.control_set_authority_field acsaf
418 SET heading_field = ahf.id
419 FROM authority.heading_field ahf
422 AND ahf.heading_purpose = 'main'
423 AND ahf.heading_type = 'uniform_title';
424 UPDATE authority.control_set_authority_field acsaf
425 SET heading_field = ahf.id
426 FROM authority.heading_field ahf
429 AND ahf.heading_purpose = 'variant'
430 AND ahf.heading_type = 'uniform_title';
431 UPDATE authority.control_set_authority_field acsaf
432 SET heading_field = ahf.id
433 FROM authority.heading_field ahf
436 AND ahf.heading_purpose = 'related'
437 AND ahf.heading_type = 'uniform_title';
439 UPDATE authority.control_set_authority_field acsaf
440 SET heading_field = ahf.id
441 FROM authority.heading_field ahf
444 AND ahf.heading_purpose = 'main'
445 AND ahf.heading_type = 'topical_term';
446 UPDATE authority.control_set_authority_field acsaf
447 SET heading_field = ahf.id
448 FROM authority.heading_field ahf
451 AND ahf.heading_purpose = 'variant'
452 AND ahf.heading_type = 'topical_term';
453 UPDATE authority.control_set_authority_field acsaf
454 SET heading_field = ahf.id
455 FROM authority.heading_field ahf
458 AND ahf.heading_purpose = 'related'
459 AND ahf.heading_type = 'topical_term';
461 UPDATE authority.control_set_authority_field acsaf
462 SET heading_field = ahf.id
463 FROM authority.heading_field ahf
466 AND ahf.heading_purpose = 'main'
467 AND ahf.heading_type = 'geographic_name';
468 UPDATE authority.control_set_authority_field acsaf
469 SET heading_field = ahf.id
470 FROM authority.heading_field ahf
473 AND ahf.heading_purpose = 'variant'
474 AND ahf.heading_type = 'geographic_name';
475 UPDATE authority.control_set_authority_field acsaf
476 SET heading_field = ahf.id
477 FROM authority.heading_field ahf
480 AND ahf.heading_purpose = 'related'
481 AND ahf.heading_type = 'geographic_name';
483 UPDATE authority.control_set_authority_field acsaf
484 SET heading_field = ahf.id
485 FROM authority.heading_field ahf
488 AND ahf.heading_purpose = 'main'
489 AND ahf.heading_type = 'genre_form_term';
490 UPDATE authority.control_set_authority_field acsaf
491 SET heading_field = ahf.id
492 FROM authority.heading_field ahf
495 AND ahf.heading_purpose = 'variant'
496 AND ahf.heading_type = 'genre_form_term';
497 UPDATE authority.control_set_authority_field acsaf
498 SET heading_field = ahf.id
499 FROM authority.heading_field ahf
502 AND ahf.heading_purpose = 'related'
503 AND ahf.heading_type = 'genre_form_term';