3 -- subset of types listed in https://www.loc.gov/marc/authority/ad1xx3xx.html
4 -- for now, ignoring subdivisions
5 CREATE TYPE authority.heading_type AS ENUM (
15 'medium_of_performance_term'
18 CREATE TYPE authority.variant_heading_type AS ENUM (
27 CREATE TYPE authority.related_heading_type AS ENUM (
30 'parent organization',
37 CREATE TYPE authority.heading_purpose AS ENUM (
43 CREATE TABLE authority.heading_field (
44 id SERIAL PRIMARY KEY,
45 heading_type authority.heading_type NOT NULL,
46 heading_purpose authority.heading_purpose NOT NULL,
48 format TEXT NOT NULL REFERENCES config.xml_transform (name) DEFAULT 'mads21',
49 heading_xpath TEXT NOT NULL,
50 component_xpath TEXT NOT NULL,
51 type_xpath TEXT NULL, -- to extract related or variant type
52 thesaurus_xpath TEXT NULL,
53 thesaurus_override_xpath TEXT NULL,
57 CREATE TABLE authority.heading_field_norm_map (
58 id SERIAL PRIMARY KEY,
59 field INT NOT NULL REFERENCES authority.heading_field (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
60 norm INT NOT NULL REFERENCES config.index_normalizer (id) ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED,
62 pos INT NOT NULL DEFAULT 0
65 INSERT INTO authority.heading_field(heading_type, heading_purpose, label, heading_xpath, component_xpath, type_xpath, thesaurus_xpath, thesaurus_override_xpath) VALUES
66 ( 'topical_term', 'main', 'Main Topical Term', '/mads21:mads/mads21:authority', '//mads21:topic', NULL, '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', NULL )
67 ,( 'topical_term', 'variant', 'Variant Topical Term', '/mads21:mads/mads21:variant', '//mads21:topic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', '//mads21:topic[1]/@authority')
68 ,( 'topical_term', 'related', 'Related Topical Term', '/mads21:mads/mads21:related', '//mads21:topic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:topic[1]/@authority', '//mads21:topic[1]/@authority')
69 ,( 'personal_name', 'main', 'Main Personal Name', '/mads21:mads/mads21:authority', '//mads21:name[@type="personal"]', NULL, NULL, NULL )
70 ,( 'personal_name', 'variant', 'Variant Personal Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="personal"]', NULL, NULL, NULL )
71 ,( 'personal_name', 'related', 'Related Personal Name', '/mads21:mads/mads21:related', '//mads21:name[@type="personal"]', '/mads21:related/@type', NULL, NULL )
72 ,( 'corporate_name', 'main', 'Main Corporate name', '/mads21:mads/mads21:authority', '//mads21:name[@type="corporate"]', NULL, NULL, NULL )
73 ,( 'corporate_name', 'variant', 'Variant Corporate Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="corporate"]', NULL, NULL, NULL )
74 ,( 'corporate_name', 'related', 'Related Corporate Name', '/mads21:mads/mads21:related', '//mads21:name[@type="corporate"]', '/mads21:related/@type', NULL, NULL )
75 ,( 'meeting_name', 'main', 'Main Meeting name', '/mads21:mads/mads21:authority', '//mads21:name[@type="conference"]', NULL, NULL, NULL )
76 ,( 'meeting_name', 'variant', 'Variant Meeting Name', '/mads21:mads/mads21:variant', '//mads21:name[@type="conference"]', NULL, NULL, NULL )
77 ,( 'meeting_name', 'related', 'Related Meeting Name', '/mads21:mads/mads21:related', '//mads21:name[@type="meeting"]', '/mads21:related/@type', NULL, NULL )
78 ,( 'geographic_name', 'main', 'Main Geographic Term', '/mads21:mads/mads21:authority', '//mads21:geographic', NULL, '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', NULL )
79 ,( 'geographic_name', 'variant', 'Variant Geographic Term', '/mads21:mads/mads21:variant', '//mads21:geographic', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
80 ,( 'geographic_name', 'related', 'Related Geographic Term', '/mads21:mads/mads21:related', '//mads21:geographic', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:geographic[1]/@authority', '//mads21:geographic[1]/@authority')
81 ,( 'genre_form_term', 'main', 'Main Genre/Form Term', '/mads21:mads/mads21:authority', '//mads21:genre', NULL, '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', NULL )
82 ,( 'genre_form_term', 'variant', 'Variant Genre/Form Term', '/mads21:mads/mads21:variant', '//mads21:genre', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority')
83 ,( 'genre_form_term', 'related', 'Related Genre/Form Term', '/mads21:mads/mads21:related', '//mads21:genre', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:genre[1]/@authority', '//mads21:genre[1]/@authority')
84 ,( 'chronological_term', 'main', 'Main Chronological Term', '/mads21:mads/mads21:authority', '//mads21:temporal', NULL, '/mads21:mads/mads21:authority/mads21:temporal[1]/@authority', NULL )
85 ,( 'chronological_term', 'variant', 'Variant Chronological Term', '/mads21:mads/mads21:variant', '//mads21:temporal', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:temporal[1]/@authority', '//mads21:temporal[1]/@authority')
86 ,( 'chronological_term', 'related', 'Related Chronological Term', '/mads21:mads/mads21:related', '//mads21:temporal', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:temporal[1]/@authority', '//mads21:temporal[1]/@authority')
87 ,( 'uniform_title', 'main', 'Main Uniform Title', '/mads21:mads/mads21:authority', '//mads21:title', NULL, '/mads21:mads/mads21:authority/mads21:title[1]/@authority', NULL )
88 ,( 'uniform_title', 'variant', 'Variant Uniform Title', '/mads21:mads/mads21:variant', '//mads21:title', '/mads21:variant/@type', '/mads21:mads/mads21:authority/mads21:title[1]/@authority', '//mads21:title[1]/@authority')
89 ,( 'uniform_title', 'related', 'Related Uniform Title', '/mads21:mads/mads21:related', '//mads21:title', '/mads21:related/@type', '/mads21:mads/mads21:authority/mads21:title[1]/@authority', '//mads21:title[1]/@authority')
92 -- NACO normalize all the things
93 INSERT INTO authority.heading_field_norm_map (field, norm, pos)
95 FROM authority.heading_field;
97 CREATE TYPE authority.heading AS (
99 type authority.heading_type,
100 purpose authority.heading_purpose,
101 variant_type authority.variant_heading_type,
102 related_type authority.related_heading_type,
105 normalized_heading TEXT
108 CREATE OR REPLACE FUNCTION authority.extract_headings(marc TEXT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$
110 idx authority.heading_field%ROWTYPE;
111 xfrm config.xml_transform%ROWTYPE;
113 transformed_xml TEXT;
115 heading_node_list TEXT[];
117 component_node_list TEXT[];
119 normalized_text TEXT;
124 base_thesaurus TEXT := NULL;
125 output_row authority.heading;
128 -- Loop over the indexing entries
129 FOR idx IN SELECT * FROM authority.heading_field WHERE restrict IS NULL OR id = ANY (restrict) ORDER BY format LOOP
131 output_row.field := idx.id;
132 output_row.type := idx.heading_type;
133 output_row.purpose := idx.heading_purpose;
135 joiner := COALESCE(idx.joiner, ' ');
137 SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
139 -- See if we can skip the XSLT ... it's expensive
140 IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
141 -- Can't skip the transform
142 IF xfrm.xslt <> '---' THEN
143 transformed_xml := oils_xslt_process(marc, xfrm.xslt);
145 transformed_xml := marc;
148 prev_xfrm := xfrm.name;
151 IF idx.thesaurus_xpath IS NOT NULL THEN
152 base_thesaurus := ARRAY_TO_STRING(oils_xpath(idx.thesaurus_xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
155 heading_node_list := oils_xpath( idx.heading_xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
157 FOR heading_node IN SELECT x FROM unnest(heading_node_list) AS x LOOP
159 CONTINUE WHEN heading_node !~ E'^\\s*<';
161 output_row.variant_type := NULL;
162 output_row.related_type := NULL;
163 output_row.thesaurus := NULL;
164 output_row.heading := NULL;
166 IF idx.heading_purpose = 'variant' AND idx.type_xpath IS NOT NULL THEN
167 type_value := ARRAY_TO_STRING(oils_xpath(idx.type_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
169 output_row.variant_type := type_value;
170 EXCEPTION WHEN invalid_text_representation THEN
171 RAISE NOTICE 'Do not recognize variant heading type %', type_value;
174 IF idx.heading_purpose = 'related' AND idx.type_xpath IS NOT NULL THEN
175 type_value := ARRAY_TO_STRING(oils_xpath(idx.type_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
177 output_row.related_type := type_value;
178 EXCEPTION WHEN invalid_text_representation THEN
179 RAISE NOTICE 'Do not recognize related heading type %', type_value;
183 IF idx.thesaurus_override_xpath IS NOT NULL THEN
184 output_row.thesaurus := ARRAY_TO_STRING(oils_xpath(idx.thesaurus_override_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]), '');
186 IF output_row.thesaurus IS NULL THEN
187 output_row.thesaurus := base_thesaurus;
192 -- now iterate over components of heading
193 component_node_list := oils_xpath( idx.component_xpath, heading_node, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
194 FOR component_node IN SELECT x FROM unnest(component_node_list) AS x LOOP
195 -- XXX much of this should be moved into oils_xpath_string...
196 curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
197 oils_xpath( '//text()', -- get the content of all the nodes within the main selected node
198 REGEXP_REPLACE( component_node, E'\\s+', ' ', 'g' ) -- Translate adjacent whitespace to a single space
199 ), ' '), ''), -- throw away morally empty (bankrupt?) strings
203 CONTINUE WHEN curr_text IS NULL OR curr_text = '';
205 IF raw_text IS NOT NULL THEN
206 raw_text := raw_text || joiner;
209 raw_text := COALESCE(raw_text,'') || curr_text;
212 IF raw_text IS NOT NULL THEN
213 output_row.heading := raw_text;
214 normalized_text := raw_text;
217 SELECT n.func AS func,
218 n.param_count AS param_count,
220 FROM config.index_normalizer n
221 JOIN authority.heading_field_norm_map m ON (m.norm = n.id)
222 WHERE m.field = idx.id
225 EXECUTE 'SELECT ' || normalizer.func || '(' ||
226 quote_literal( normalized_text ) ||
228 WHEN normalizer.param_count > 0
229 THEN ',' || REPLACE(REPLACE(BTRIM(normalizer.params,'[]'),E'\'',E'\\\''),E'"',E'\'')
232 ')' INTO normalized_text;
236 output_row.normalized_heading := normalized_text;
238 RETURN NEXT output_row;
244 $func$ LANGUAGE PLPGSQL;
246 CREATE OR REPLACE FUNCTION authority.extract_headings(rid BIGINT, restrict INT[] DEFAULT NULL) RETURNS SETOF authority.heading AS $func$
248 auth authority.record_entry%ROWTYPE;
249 output_row authority.heading;
252 SELECT INTO auth * FROM authority.record_entry WHERE id = rid;
254 RETURN QUERY SELECT * FROM authority.extract_headings(auth.marc, restrict);
256 $func$ LANGUAGE PLPGSQL;
258 CREATE OR REPLACE FUNCTION authority.simple_heading_set( marcxml TEXT ) RETURNS SETOF authority.simple_heading AS $func$
260 res authority.simple_heading%ROWTYPE;
261 acsaf authority.control_set_authority_field%ROWTYPE;
262 heading_row authority.heading%ROWTYPE;
273 auth_id INT DEFAULT COALESCE(NULLIF(oils_xpath_string('//*[@tag="901"]/*[local-name()="subfield" and @code="c"]', marcxml), ''), '0')::INT;
276 SELECT control_set INTO cset FROM authority.record_entry WHERE id = auth_id;
279 SELECT control_set INTO cset
280 FROM authority.control_set_authority_field
281 WHERE tag IN ( SELECT UNNEST(XPATH('//*[starts-with(@tag,"1")]/@tag',marcxml::XML)::TEXT[]))
285 res.record := auth_id;
286 res.thesaurus := authority.extract_thesaurus(marcxml);
288 FOR acsaf IN SELECT * FROM authority.control_set_authority_field WHERE control_set = cset LOOP
289 res.atag := acsaf.id;
291 IF acsaf.heading_field IS NULL THEN
292 tag_used := acsaf.tag;
293 nfi_used := acsaf.nfi;
294 joiner_text := COALESCE(acsaf.joiner, ' ');
296 FOR tmp_xml IN SELECT UNNEST(XPATH('//*[@tag="'||tag_used||'"]', marcxml::XML)::TEXT[]) LOOP
298 heading_text := COALESCE(
299 oils_xpath_string('./*[contains("'||acsaf.display_sf_list||'",@code)]', tmp_xml, joiner_text),
303 IF nfi_used IS NOT NULL THEN
305 sort_text := SUBSTRING(
310 oils_xpath_string('./@ind'||nfi_used, tmp_xml::TEXT),
322 sort_text := heading_text;
325 IF heading_text IS NOT NULL AND heading_text <> '' THEN
326 res.value := heading_text;
327 res.sort_value := public.naco_normalize(sort_text);
328 res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
334 FOR heading_row IN SELECT * FROM authority.extract_headings(marcxml, ARRAY[acsaf.heading_field]) LOOP
335 res.value := heading_row.heading;
336 res.sort_value := heading_row.normalized_heading;
337 res.index_vector = to_tsvector('keyword'::regconfig, res.sort_value);
345 $func$ LANGUAGE PLPGSQL STABLE STRICT;
347 ALTER TABLE authority.control_set_authority_field ADD COLUMN heading_field INTEGER REFERENCES authority.heading_field(id);
349 UPDATE authority.control_set_authority_field acsaf
350 SET heading_field = ahf.id
351 FROM authority.heading_field ahf
354 AND ahf.heading_purpose = 'main'
355 AND ahf.heading_type = 'personal_name';
356 UPDATE authority.control_set_authority_field acsaf
357 SET heading_field = ahf.id
358 FROM authority.heading_field ahf
361 AND ahf.heading_purpose = 'variant'
362 AND ahf.heading_type = 'personal_name';
363 UPDATE authority.control_set_authority_field acsaf
364 SET heading_field = ahf.id
365 FROM authority.heading_field ahf
368 AND ahf.heading_purpose = 'related'
369 AND ahf.heading_type = 'personal_name';
371 UPDATE authority.control_set_authority_field acsaf
372 SET heading_field = ahf.id
373 FROM authority.heading_field ahf
376 AND ahf.heading_purpose = 'main'
377 AND ahf.heading_type = 'corporate_name';
378 UPDATE authority.control_set_authority_field acsaf
379 SET heading_field = ahf.id
380 FROM authority.heading_field ahf
383 AND ahf.heading_purpose = 'variant'
384 AND ahf.heading_type = 'corporate_name';
385 UPDATE authority.control_set_authority_field acsaf
386 SET heading_field = ahf.id
387 FROM authority.heading_field ahf
390 AND ahf.heading_purpose = 'related'
391 AND ahf.heading_type = 'corporate_name';
393 UPDATE authority.control_set_authority_field acsaf
394 SET heading_field = ahf.id
395 FROM authority.heading_field ahf
398 AND ahf.heading_purpose = 'main'
399 AND ahf.heading_type = 'meeting_name';
400 UPDATE authority.control_set_authority_field acsaf
401 SET heading_field = ahf.id
402 FROM authority.heading_field ahf
405 AND ahf.heading_purpose = 'variant'
406 AND ahf.heading_type = 'meeting_name';
407 UPDATE authority.control_set_authority_field acsaf
408 SET heading_field = ahf.id
409 FROM authority.heading_field ahf
412 AND ahf.heading_purpose = 'related'
413 AND ahf.heading_type = 'meeting_name';
415 UPDATE authority.control_set_authority_field acsaf
416 SET heading_field = ahf.id
417 FROM authority.heading_field ahf
420 AND ahf.heading_purpose = 'main'
421 AND ahf.heading_type = 'uniform_title';
422 UPDATE authority.control_set_authority_field acsaf
423 SET heading_field = ahf.id
424 FROM authority.heading_field ahf
427 AND ahf.heading_purpose = 'variant'
428 AND ahf.heading_type = 'uniform_title';
429 UPDATE authority.control_set_authority_field acsaf
430 SET heading_field = ahf.id
431 FROM authority.heading_field ahf
434 AND ahf.heading_purpose = 'related'
435 AND ahf.heading_type = 'uniform_title';
437 UPDATE authority.control_set_authority_field acsaf
438 SET heading_field = ahf.id
439 FROM authority.heading_field ahf
442 AND ahf.heading_purpose = 'main'
443 AND ahf.heading_type = 'topical_term';
444 UPDATE authority.control_set_authority_field acsaf
445 SET heading_field = ahf.id
446 FROM authority.heading_field ahf
449 AND ahf.heading_purpose = 'variant'
450 AND ahf.heading_type = 'topical_term';
451 UPDATE authority.control_set_authority_field acsaf
452 SET heading_field = ahf.id
453 FROM authority.heading_field ahf
456 AND ahf.heading_purpose = 'related'
457 AND ahf.heading_type = 'topical_term';
459 UPDATE authority.control_set_authority_field acsaf
460 SET heading_field = ahf.id
461 FROM authority.heading_field ahf
464 AND ahf.heading_purpose = 'main'
465 AND ahf.heading_type = 'geographic_name';
466 UPDATE authority.control_set_authority_field acsaf
467 SET heading_field = ahf.id
468 FROM authority.heading_field ahf
471 AND ahf.heading_purpose = 'variant'
472 AND ahf.heading_type = 'geographic_name';
473 UPDATE authority.control_set_authority_field acsaf
474 SET heading_field = ahf.id
475 FROM authority.heading_field ahf
478 AND ahf.heading_purpose = 'related'
479 AND ahf.heading_type = 'geographic_name';
481 UPDATE authority.control_set_authority_field acsaf
482 SET heading_field = ahf.id
483 FROM authority.heading_field ahf
486 AND ahf.heading_purpose = 'main'
487 AND ahf.heading_type = 'genre_form_term';
488 UPDATE authority.control_set_authority_field acsaf
489 SET heading_field = ahf.id
490 FROM authority.heading_field ahf
493 AND ahf.heading_purpose = 'variant'
494 AND ahf.heading_type = 'genre_form_term';
495 UPDATE authority.control_set_authority_field acsaf
496 SET heading_field = ahf.id
497 FROM authority.heading_field ahf
500 AND ahf.heading_purpose = 'related'
501 AND ahf.heading_type = 'genre_form_term';