BEGIN; -- check whether patch can be applied SELECT evergreen.upgrade_deps_block_check('0844', :eg_version); -- 953.data.MODS32-xsl.sql UPDATE config.xml_transform SET xslt=$$ BK SE BK MM CF MP VM MU b afgk abfgk <xsl:value-of select="substring($titleChop,@ind2+1)"/> <xsl:value-of select="$titleChop"/> b b afgk <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="subfieldSelect"> <xsl:with-param name="codes">abfgk</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> abfgk <xsl:value-of select="substring($titleBrowseChop,@ind2+1)"/> <xsl:value-of select="$titleBrowseChop"/> <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="subfieldSelect"> <xsl:with-param name="codes">a</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> a <xsl:value-of select="$titleChop" /> <xsl:value-of select="substring($titleChop,@ind2+1)"/> <xsl:value-of select="$titleChop" /> <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="subfieldSelect"> <!-- 1/04 removed $h, $b --> <xsl:with-param name="codes">af</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> <xsl:value-of select="$titleChop"/> <xsl:value-of select="substring($titleChop,$nfi+1)"/> <xsl:value-of select="$titleChop"/> ah <xsl:value-of select="$titleChop" /> <xsl:value-of select="substring($titleChop,@ind1+1)"/> <xsl:value-of select="$titleChop" /> creator creator creator personal yes yes text cartographic notated music sound recording-nonmusical sound recording-musical still image moving image three dimensional object software, multimedia mixed material globe remote sensing image map atlas database loose-leaf series newspaper periodical web site abstract or summary bibliography catalog dictionary encyclopedia handbook legal article index discography legislation theses survey of literature review programmed text filmography directory statistics technical report legal case and case notes law report or digest treaty conference publication numeric data database font game patent festschrift biography essay drama comic strip fiction humor, satire letter novel short story speech biography conference publication drama essay fiction folktale history humor, satire memoir poetry rehearsal reporting sound speech art original kit art reproduction diorama filmstrip legal article picture graphic technical drawing motion picture chart flash card microscope slide model realia slide transparency videorecording toy abvxyz - code marccountry code iso3166 text :,;/ monographic continuing ab reformatted digital digitized microfilm digitized other analog
braille
print
electronic
microfiche
microfilm
access preservation replacement
chip cartridge
computer optical disc cartridge
magnetic disc
magneto-optical disc
optical disc
remote
tape cartridge
tape cassette
tape reel
celestial globe
earth moon globe
planetary or lunar globe
terrestrial globe
kit
atlas
diagram
map
model
profile
remote-sensing image
section
view
aperture card
microfiche
microfiche cassette
microfilm cartridge
microfilm cassette
microfilm reel
microopaque
film cartridge
film cassette
film reel
chart
collage
drawing
flash card
painting
photomechanical print
photonegative
photoprint
picture
print
technical drawing
notated music
filmslip
filmstrip cartridge
filmstrip roll
other filmstrip type
slide
transparency
remote-sensing image
cylinder
roll
sound cartridge
sound cassette
sound disc
sound-tape reel
sound-track film
wire recording
braille
combination
moon
tactile, with no writing system
braille
large print
regular print
text in looseleaf binder
videocartridge
videocassette
videodisc
videoreel
abce
ab agrt ab adolescent adult general juvenile preschool specialized defg marcgac iso3166 ab abx ab ab av <xsl:value-of select="$titleChop" /> <xsl:value-of select="substring($titleChop,@ind2+1)"/> <xsl:value-of select="$titleChop" /> <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="subfieldSelect"> <xsl:with-param name="codes">av</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> abcx3 <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="specialSubfieldSelect"> <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> <xsl:with-param name="axis">t</xsl:with-param> <xsl:with-param name="afterCodes">g</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> aq t g <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="specialSubfieldSelect"> <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> <xsl:with-param name="axis">t</xsl:with-param> <xsl:with-param name="afterCodes">dg</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> c t dgn <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="specialSubfieldSelect"> <xsl:with-param name="anyCodes">tfklsv</xsl:with-param> <xsl:with-param name="axis">t</xsl:with-param> <xsl:with-param name="afterCodes">g</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> aqdc t gn <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="subfieldSelect"> <xsl:with-param name="codes">adfgklmorsv</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> <xsl:value-of select="$titleChop" /> <xsl:value-of select="substring($titleChop,@ind1+1)"/> <xsl:value-of select="$titleChop" /> <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="specialSubfieldSelect"> <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> <xsl:with-param name="axis">t</xsl:with-param> <xsl:with-param name="afterCodes">g</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> aq t g <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="specialSubfieldSelect"> <xsl:with-param name="anyCodes">tfklmorsv</xsl:with-param> <xsl:with-param name="axis">t</xsl:with-param> <xsl:with-param name="afterCodes">dg</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> c t dgn <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:call-template name="specialSubfieldSelect"> <xsl:with-param name="anyCodes">tfklsv</xsl:with-param> <xsl:with-param name="axis">t</xsl:with-param> <xsl:with-param name="afterCodes">g</xsl:with-param> </xsl:call-template> </xsl:with-param> </xsl:call-template> aqdc t gn adfgklmorsv <xsl:value-of select="$titleChop" /> <xsl:value-of select="substring($titleChop,@ind2+1)"/> <xsl:value-of select="$titleChop" /> isbn isrc ismn sici ab issn lccn issue number matrix number music plate music publisher videorecording identifier ba ab ab doi hdl uri y3z y3 z abje abcd35 abcde35
n n fgkdlmor p p fgkdlmor g g pst p p fgkdlmor
cdn aq :,;/ acdeq constituent <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:value-of select="."></xsl:value-of> </xsl:with-param> </xsl:call-template> <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:value-of select="."></xsl:value-of> </xsl:with-param> </xsl:call-template> <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:value-of select="."></xsl:value-of> </xsl:with-param> </xsl:call-template> <xsl:call-template name="chopPunctuation"> <xsl:with-param name="chopString"> <xsl:value-of select="."></xsl:value-of> </xsl:with-param> </xsl:call-template> code marcgac lcsh lcshac mesh nal csh rvm aq cdnp abcdeqnp adfhklor <xsl:value-of select="$titleChop" /> <xsl:value-of select="substring($titleChop,@ind1+1)"/> <xsl:value-of select="$titleChop" /> abcd bc yes Arabic Latin Chinese, Japanese, Korean Cyrillic Hebrew Greek summary or subtitle sung or spoken text libretto table of contents accompanying material translation summary or subtitle sung or spoken text libretto table of contents accompanying material translation .:,;/
$$ WHERE name = 'mods32'; CREATE OR REPLACE FUNCTION biblio.extract_metabib_field_entry ( rid BIGINT, default_joiner TEXT ) RETURNS SETOF metabib.field_entry_template AS $func$ DECLARE bib biblio.record_entry%ROWTYPE; idx config.metabib_field%ROWTYPE; xfrm config.xml_transform%ROWTYPE; prev_xfrm TEXT; transformed_xml TEXT; xml_node TEXT; xml_node_list TEXT[]; facet_text TEXT; browse_text TEXT; sort_value TEXT; raw_text TEXT; curr_text TEXT; joiner TEXT := default_joiner; -- XXX will index defs supply a joiner? authority_text TEXT; authority_link BIGINT; output_row metabib.field_entry_template%ROWTYPE; BEGIN -- Start out with no field-use bools set output_row.browse_field = FALSE; output_row.facet_field = FALSE; output_row.search_field = FALSE; -- Get the record SELECT INTO bib * FROM biblio.record_entry WHERE id = rid; -- Loop over the indexing entries FOR idx IN SELECT * FROM config.metabib_field ORDER BY format LOOP joiner := COALESCE(idx.joiner, default_joiner); SELECT INTO xfrm * from config.xml_transform WHERE name = idx.format; -- See if we can skip the XSLT ... it's expensive IF prev_xfrm IS NULL OR prev_xfrm <> xfrm.name THEN -- Can't skip the transform IF xfrm.xslt <> '---' THEN transformed_xml := oils_xslt_process(bib.marc,xfrm.xslt); ELSE transformed_xml := bib.marc; END IF; prev_xfrm := xfrm.name; END IF; xml_node_list := oils_xpath( idx.xpath, transformed_xml, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); raw_text := NULL; FOR xml_node IN SELECT x FROM unnest(xml_node_list) AS x LOOP CONTINUE WHEN xml_node !~ E'^\\s*<'; -- XXX much of this should be moved into oils_xpath_string... curr_text := ARRAY_TO_STRING(evergreen.array_remove_item_by_value(evergreen.array_remove_item_by_value( oils_xpath( '//text()', REGEXP_REPLACE( REGEXP_REPLACE( -- This escapes all &s not followed by "amp;". Data ise returned from oils_xpath (above) in UTF-8, not entity encoded REGEXP_REPLACE( -- This escapes embeded [^<]+)(<)([^>]+<)$re$, E'\\1<\\3', 'g' ), '&(?!amp;)', '&', 'g' ), E'\\s+', ' ', 'g' ) ), ' '), ''), joiner ); CONTINUE WHEN curr_text IS NULL OR curr_text = ''; IF raw_text IS NOT NULL THEN raw_text := raw_text || joiner; END IF; raw_text := COALESCE(raw_text,'') || curr_text; -- autosuggest/metabib.browse_entry IF idx.browse_field THEN IF idx.browse_xpath IS NOT NULL AND idx.browse_xpath <> '' THEN browse_text := oils_xpath_string( idx.browse_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); ELSE browse_text := curr_text; END IF; IF idx.browse_sort_xpath IS NOT NULL AND idx.browse_sort_xpath <> '' THEN sort_value := oils_xpath_string( idx.browse_sort_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); ELSE sort_value := browse_text; END IF; output_row.field_class = idx.field_class; output_row.field = idx.id; output_row.source = rid; output_row.value = BTRIM(REGEXP_REPLACE(browse_text, E'\\s+', ' ', 'g')); output_row.sort_value := public.naco_normalize(sort_value); output_row.authority := NULL; IF idx.authority_xpath IS NOT NULL AND idx.authority_xpath <> '' THEN authority_text := oils_xpath_string( idx.authority_xpath, xml_node, joiner, ARRAY[ ARRAY[xfrm.prefix, xfrm.namespace_uri], ARRAY['xlink','http://www.w3.org/1999/xlink'] ] ); IF authority_text ~ '^\d+$' THEN authority_link := authority_text::BIGINT; PERFORM * FROM authority.record_entry WHERE id = authority_link; IF FOUND THEN output_row.authority := authority_link; END IF; END IF; END IF; output_row.browse_field = TRUE; -- Returning browse rows with search_field = true for search+browse -- configs allows us to retain granularity of being able to search -- browse fields with "starts with" type operators (for example, for -- titles of songs in music albums) IF idx.search_field THEN output_row.search_field = TRUE; END IF; RETURN NEXT output_row; output_row.browse_field = FALSE; output_row.search_field = FALSE; output_row.sort_value := NULL; END IF; -- insert raw node text for faceting IF idx.facet_field THEN IF idx.facet_xpath IS NOT NULL AND idx.facet_xpath <> '' THEN facet_text := oils_xpath_string( idx.facet_xpath, xml_node, joiner, ARRAY[ARRAY[xfrm.prefix, xfrm.namespace_uri]] ); ELSE facet_text := curr_text; END IF; output_row.field_class = idx.field_class; output_row.field = -1 * idx.id; output_row.source = rid; output_row.value = BTRIM(REGEXP_REPLACE(facet_text, E'\\s+', ' ', 'g')); output_row.facet_field = TRUE; RETURN NEXT output_row; output_row.facet_field = FALSE; END IF; END LOOP; CONTINUE WHEN raw_text IS NULL OR raw_text = ''; -- insert combined node text for searching IF idx.search_field THEN output_row.field_class = idx.field_class; output_row.field = idx.id; output_row.source = rid; output_row.value = BTRIM(REGEXP_REPLACE(raw_text, E'\\s+', ' ', 'g')); output_row.search_field = TRUE; RETURN NEXT output_row; output_row.search_field = FALSE; END IF; END LOOP; END; $func$ LANGUAGE PLPGSQL; CREATE OR REPLACE FUNCTION metabib.reingest_metabib_field_entries( bib_id BIGINT, skip_facet BOOL DEFAULT FALSE, skip_browse BOOL DEFAULT FALSE, skip_search BOOL DEFAULT FALSE ) RETURNS VOID AS $func$ DECLARE fclass RECORD; ind_data metabib.field_entry_template%ROWTYPE; mbe_row metabib.browse_entry%ROWTYPE; mbe_id BIGINT; b_skip_facet BOOL; b_skip_browse BOOL; b_skip_search BOOL; value_prepped TEXT; BEGIN SELECT COALESCE(NULLIF(skip_facet, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_facet_indexing' AND enabled)) INTO b_skip_facet; SELECT COALESCE(NULLIF(skip_browse, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_browse_indexing' AND enabled)) INTO b_skip_browse; SELECT COALESCE(NULLIF(skip_search, FALSE), EXISTS (SELECT enabled FROM config.internal_flag WHERE name = 'ingest.skip_search_indexing' AND enabled)) INTO b_skip_search; PERFORM * FROM config.internal_flag WHERE name = 'ingest.assume_inserts_only' AND enabled; IF NOT FOUND THEN IF NOT b_skip_search THEN FOR fclass IN SELECT * FROM config.metabib_class LOOP -- RAISE NOTICE 'Emptying out %', fclass.name; EXECUTE $$DELETE FROM metabib.$$ || fclass.name || $$_field_entry WHERE source = $$ || bib_id; END LOOP; END IF; IF NOT b_skip_facet THEN DELETE FROM metabib.facet_entry WHERE source = bib_id; END IF; IF NOT b_skip_browse THEN DELETE FROM metabib.browse_entry_def_map WHERE source = bib_id; END IF; END IF; FOR ind_data IN SELECT * FROM biblio.extract_metabib_field_entry( bib_id ) LOOP IF ind_data.field < 0 THEN ind_data.field = -1 * ind_data.field; END IF; IF ind_data.facet_field AND NOT b_skip_facet THEN INSERT INTO metabib.facet_entry (field, source, value) VALUES (ind_data.field, ind_data.source, ind_data.value); END IF; IF ind_data.browse_field AND NOT b_skip_browse THEN -- A caveat about this SELECT: this should take care of replacing -- old mbe rows when data changes, but not if normalization (by -- which I mean specifically the output of -- evergreen.oils_tsearch2()) changes. It may or may not be -- expensive to add a comparison of index_vector to index_vector -- to the WHERE clause below. value_prepped := metabib.browse_normalize(ind_data.value, ind_data.field); SELECT INTO mbe_row * FROM metabib.browse_entry WHERE value = value_prepped AND sort_value = ind_data.sort_value; IF FOUND THEN mbe_id := mbe_row.id; ELSE INSERT INTO metabib.browse_entry ( value, sort_value ) VALUES ( value_prepped, ind_data.sort_value ); mbe_id := CURRVAL('metabib.browse_entry_id_seq'::REGCLASS); END IF; INSERT INTO metabib.browse_entry_def_map (entry, def, source, authority) VALUES (mbe_id, ind_data.field, ind_data.source, ind_data.authority); END IF; IF ind_data.search_field AND NOT b_skip_search THEN -- Avoid inserting duplicate rows EXECUTE 'SELECT 1 FROM metabib.' || ind_data.field_class || '_field_entry WHERE field = $1 AND source = $2 AND value = $3' INTO mbe_id USING ind_data.field, ind_data.source, ind_data.value; -- RAISE NOTICE 'Search for an already matching row returned %', mbe_id; IF mbe_id IS NULL THEN EXECUTE $$ INSERT INTO metabib.$$ || ind_data.field_class || $$_field_entry (field, source, value) VALUES ($$ || quote_literal(ind_data.field) || $$, $$ || quote_literal(ind_data.source) || $$, $$ || quote_literal(ind_data.value) || $$);$$; END IF; END IF; END LOOP; IF NOT b_skip_search THEN PERFORM metabib.update_combined_index_vectors(bib_id); END IF; RETURN; END; $func$ LANGUAGE PLPGSQL; -- Don't use Title Proper search field as the browse field UPDATE config.metabib_field SET browse_field = FALSE, browse_xpath = NULL, browse_sort_xpath = NULL WHERE id = 6; -- Create a new Title Proper browse config INSERT INTO config.metabib_field ( id, field_class, name, label, format, xpath, search_field, authority_xpath, browse_field, browse_sort_xpath ) VALUES (31, 'title', 'browse', oils_i18n_gettext(31, 'Title Proper (Browse)', 'cmf', 'label'), 'mods32', $$//mods32:mods/mods32:titleBrowse$$, FALSE, '//@xlink:href', TRUE, $$*[local-name() != "nonSort"]$$ ); COMMIT; \qecho This is a browse-only reingest of your bib records. It may take a while. \qecho You may cancel now without losing the effect of the rest of the \qecho upgrade script, and arrange the reingest later. \qecho . SELECT metabib.reingest_metabib_field_entries(id, TRUE, FALSE, TRUE) FROM biblio.record_entry;