BEGIN;
-- check whether patch can be applied
SELECT evergreen.upgrade_deps_block_check('0844', :eg_version);
-- 953.data.MODS32-xsl.sql
UPDATE config.xml_transform SET xslt=$$
BK
SE
BK
MM
CF
MP
VM
MU
b
afgk
abfgk
b
b
afgk
abfgk
abfgk
a
a
af
ah
creator
creator
creator
personal
yes
yes
text
cartographic
notated music
sound recording-nonmusical
sound recording-musical
still image
moving image
three dimensional object
software, multimedia
mixed material
globe
remote sensing image
map
atlas
database
loose-leaf
series
newspaper
periodical
web site
abstract or summary
bibliography
catalog
dictionary
encyclopedia
handbook
legal article
index
discography
legislation
theses
survey of literature
review
programmed text
filmography
directory
statistics
technical report
legal case and case notes
law report or digest
treaty
conference publication
numeric data
database
font
game
patent
festschrift
biography
essay
drama
comic strip
fiction
humor, satire
letter
novel
short story
speech
biography
conference publication
drama
essay
fiction
folktale
history
humor, satire
memoir
poetry
rehearsal
reporting
sound
speech
art original
kit
art reproduction
diorama
filmstrip
legal article
picture
graphic
technical drawing
motion picture
chart
flash card
microscope slide
model
realia
slide
transparency
videorecording
toy
abvxyz
-
code
marccountry
code
iso3166
text
:,;/
monographic
continuing
ab
reformatted digital
digitized microfilm
digitized other analog
access
preservation
replacement
abce
ab
agrt
ab
adolescent
adult
general
juvenile
preschool
specialized
defg
marcgac
iso3166
ab
abx
ab
ab
av
av
abcx3
tfklmorsv
t
g
aq
t
g
tfklmorsv
t
dg
c
t
dgn
tfklsv
t
g
aqdc
t
gn
adfgklmorsv
tfklmorsv
t
g
aq
t
g
tfklmorsv
t
dg
c
t
dgn
tfklsv
t
g
aqdc
t
gn
adfgklmorsv
isbn
isrc
ismn
sici
ab
issn
lccn
issue number
matrix number
music plate
music publisher
videorecording identifier
ba
ab
ab
doi
hdl
uri
y3z
y3
z
abje
abcd35
abcde35
n
n
fgkdlmor
p
p
fgkdlmor
g
g
pst
p
p
fgkdlmor
cdn
aq
:,;/
acdeq
constituent
code
marcgac
lcsh
lcshac
mesh
nal
csh
rvm
aq
cdnp
abcdeqnp
adfhklor
abcd
bc
yes
Arabic
Latin
Chinese, Japanese, Korean
Cyrillic
Hebrew
Greek
summary or subtitle
sung or spoken text
libretto
table of contents
accompanying material
translation
summary or subtitle
sung or spoken text
libretto
table of contents
accompanying material
translation
.:,;/
$$ WHERE name = 'mods32';
CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$
DECLARE
bib biblio.record_entry%ROWTYPE;
idx config.metabib_field%ROWTYPE;
xfrm config.xml_transform%ROWTYPE;
prev_xfrm TEXT;
transformed_xml TEXT;
xml_node TEXT;
xml_node_list TEXT[];
facet_text TEXT;
browse_text TEXT;
sort_value TEXT;
raw_text TEXT;
curr_text TEXT;
joiner TEXT := default_joiner; -- XXX will index defs supply a joiner?
authority_text TEXT;
authority_link BIGINT;
output_row metabib.field_entry_template%ROWTYPE;
BEGIN
-- Start out with no field-use bools set
output_row.browse_field = FALSE;
output_row.facet_field = FALSE;
output_row.search_field = FALSE;
-- Get the record
SELECT INTO bib * FROM biblio.record_entry WHERE id = rid;
-- Loop over the indexing entries
FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP
joiner := COALESCE(idx.joiner, default_joiner);
SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format;
-- See if we can skip the XSLT ... it's expensive
IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN
-- Can't skip the transform
IF xfrm.xslt <> '---' THEN
transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt);
ELSE
transformed_xml := bib.marc;
END IF;
prev_xfrm := xfrm.name;
END IF;
xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
raw_text := NULL;
FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP
CONTINUE WHEN xml_node !~ E'^\\s*<';
-- XXX much of this should be moved into oils_xpath_string...
curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value(
oils_xpath( '//text()',
REGEXP_REPLACE(
REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded
REGEXP_REPLACE( -- This escapes embeded [^<]+)(<)([^>]+<)$re$,
E'\\1<\\3',
'g'
),
'&(?!amp;)',
'&',
'g'
),
E'\\s+',
' ',
'g'
)
), ' '), ''),
joiner
);
CONTINUE WHEN curr_text IS NULL OR curr_text = '';
IF raw_text IS NOT NULL THEN
raw_text := raw_text || joiner;
END IF;
raw_text := COALESCE(raw_text,'') || curr_text;
-- autosuggest/metabib.browse_entry
IF idx.browse_field THEN
IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN
browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
ELSE
browse_text := curr_text;
END IF;
IF idx.browse_sort_xpath IS NOT NULL AND
idx.browse_sort_xpath <> '' THEN
sort_value := oils_xpath_string(
idx.browse_sort_xpath, xml_node, joiner,
ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]]
);
ELSE
sort_value := browse_text;
END IF;
output_row.field_class = idx.field_class;
output_row.field = idx.id;
output_row.source = rid;
output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g'));
output_row.sort_value :=
public.naco_normalize(sort_value);
output_row.authority := NULL;
IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN
authority_text := oils_xpath_string(
idx.authority_xpath, xml_node, joiner,
ARRAY[
ARRAY[xfrm.prefix, xfrm.namespace_uri],
ARRAY['xlink','http://www.w3.org/1999/xlink']
]
);
IF authority_text ~ '^\d+$' THEN
authority_link := authority_text::BIGINT;
PERFORM * FROM authority.record_entry WHERE id = authority_link;
IF FOUND THEN
output_row.authority := authority_link;
END IF;
END IF;
END IF;
output_row.browse_field = TRUE;
-- Returning browse rows with search_field = true for search+browse
-- configs allows us to retain granularity of being able to search
-- browse fields with "starts with" type operators (for example, for
-- titles of songs in music albums)
IF idx.search_field THEN
output_row.search_field = TRUE;
END IF;
RETURN NEXT output_row;
output_row.browse_field = FALSE;
output_row.search_field = FALSE;
output_row.sort_value := NULL;
END IF;
-- insert raw node text for faceting
IF idx.facet_field THEN
IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN
facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] );
ELSE
facet_text := curr_text;
END IF;
output_row.field_class = idx.field_class;
output_row.field = -1 * idx.id;
output_row.source = rid;
output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g'));
output_row.facet_field = TRUE;
RETURN NEXT output_row;
output_row.facet_field = FALSE;
END IF;
END LOOP;
CONTINUE WHEN raw_text IS NULL OR raw_text = '';
-- insert combined node text for searching
IF idx.search_field THEN
output_row.field_class = idx.field_class;
output_row.field = idx.id;
output_row.source = rid;
output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g'));
output_row.search_field = TRUE;
RETURN NEXT output_row;
output_row.search_field = FALSE;
END IF;
END LOOP;
END;
$func$ LANGUAGE PLPGSQL;
CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$
DECLARE
fclass RECORD;
ind_data metabib.field_entry_template%ROWTYPE;
mbe_row metabib.browse_entry%ROWTYPE;
mbe_id BIGINT;
b_skip_facet BOOL;
b_skip_browse BOOL;
b_skip_search BOOL;
value_prepped TEXT;
BEGIN
SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet;
SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse;
SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search;
PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled;
IF NOT FOUND THEN
IF NOT b_skip_search THEN
FOR fclass IN SELECT * FROM config.metabib_class LOOP
-- RAISE NOTICE 'Emptying out %', fclass.name;
EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id;
END LOOP;
END IF;
IF NOT b_skip_facet THEN
DELETE FROM metabib.facet_entry WHERE source = bib_id;
END IF;
IF NOT b_skip_browse THEN
DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id;
END IF;
END IF;
FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP
IF ind_data.field < 0 THEN
ind_data.field = -1 * ind_data.field;
END IF;
IF ind_data.facet_field AND NOT b_skip_facet THEN
INSERT INTO metabib.facet_entry (field, source, value)
VALUES (ind_data.field, ind_data.source, ind_data.value);
END IF;
IF ind_data.browse_field AND NOT b_skip_browse THEN
-- A caveat about this SELECT: this should take care of replacing
-- old mbe rows when data changes, but not if normalization (by
-- which I mean specifically the output of
-- evergreen.oils_tsearch2()) changes. It may or may not be
-- expensive to add a comparison of index_vector to index_vector
-- to the WHERE clause below.
value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field);
SELECT INTO mbe_row * FROM metabib.browse_entry
WHERE value = value_prepped AND sort_value = ind_data.sort_value;
IF FOUND THEN
mbe_id := mbe_row.id;
ELSE
INSERT INTO metabib.browse_entry
( value, sort_value ) VALUES
( value_prepped, ind_data.sort_value );
mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS);
END IF;
INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority)
VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority);
END IF;
IF ind_data.search_field AND NOT b_skip_search THEN
-- Avoid inserting duplicate rows
EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class ||
'_field_entry WHERE field = $1 AND source = $2 AND value = $3'
INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value;
-- RAISE NOTICE 'Search for an already matching row returned %', mbe_id;
IF mbe_id IS NULL THEN
EXECUTE $$
INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value)
VALUES ($$ ||
quote_literal(ind_data.field) || $$, $$ ||
quote_literal(ind_data.source) || $$, $$ ||
quote_literal(ind_data.value) ||
$$);$$;
END IF;
END IF;
END LOOP;
IF NOT b_skip_search THEN
PERFORM metabib.update_combined_index_vectors(bib_id);
END IF;
RETURN;
END;
$func$ LANGUAGE PLPGSQL;
-- Don't use Title Proper search field as the browse field
UPDATE config.metabib_field SET browse_field = FALSE, browse_xpath = NULL, browse_sort_xpath = NULL WHERE id = 6;
-- Create a new Title Proper browse config
INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, search_field, authority_xpath, browse_field, browse_sort_xpath ) VALUES
(31, 'title', 'browse', oils_i18n_gettext(31, 'Title Proper (Browse)', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleBrowse$$, FALSE, '//@xlink:href', TRUE, $$*[local-name() != "nonSort"]$$ );
COMMIT;
\qecho This is a browse-only reingest of your bib records. It may take a while.
\qecho You may cancel now without losing the effect of the rest of the
\qecho upgrade script, and arrange the reingest later.
\qecho .
SELECT metabib.reingest_metabib_field_entries(id, TRUE, FALSE, TRUE)
FROM biblio.record_entry;