5 CREATE FUNCTION nfkd(TEXT) RETURNS TEXT AS $$
8 use Unicode::Normalize;
11 $$ LANGUAGE PLPERLU STABLE;
13 SELECT is( public.naco_normalize('abc'), 'abc', 'regular text' );
14 SELECT is( public.naco_normalize('ABC'), 'abc', 'regular text' );
15 SELECT is( public.naco_normalize('åbçdéñœöîøæÇıÂÅÍÎÏÔÔÒÚÆŒè'), 'abcdenoeoioaeciaaiiiooouaeoee', 'European diacritics' );
16 SELECT is( public.naco_normalize('“‘„«quotes»’”'), 'quotes', 'special quotes' );
17 SELECT is( public.naco_normalize('
\98abc
\9c def'), 'def', 'special non-filing characters designation' );
18 SELECT is( public.naco_normalize('
\9cabcdef'), 'abcdef', 'unpaired start of string' );
19 SELECT is( public.naco_normalize('ß'), 'ss', 'sharp S (eszett)' );
20 SELECT is( public.naco_normalize('flfiff'), 'flfiff', 'ligatures' );
21 SELECT is( public.naco_normalize('ƠơƯư²IJij'), 'oouu2ijij', 'NFKD applied correctly' );
22 SELECT is( public.naco_normalize('ÆØÞæðøþĐđıŁłŒœʻʼℓ'), 'aeothaedothddilloeoel', 'part 3.6' );
23 SELECT is( public.naco_normalize('Ð'), 'd', 'uppercase eth (missing from 3.6?)' );
24 SELECT is( public.naco_normalize('ıİ'), 'ii', 'Turkish I' );
25 SELECT is( public.naco_normalize('[book''s cover]'), 'books cover', 'square brackets and apostrophe' );
26 SELECT is( public.naco_normalize(' grue food '), 'grue food', 'trim spaces' );
27 -- note addition of nfkd() to transform expected output
28 SELECT is( public.naco_normalize('한국어 조선말'), nfkd('한국어 조선말'), 'Korean text' );
29 SELECT is( public.naco_normalize('普通話 / 普通话'), '普通話 普通话', 'Chinese text' );
30 SELECT is( public.naco_normalize('العربية'), 'العربية', 'Arabic text' );
31 SELECT is( public.naco_normalize('ქართული ენა'), 'ქართული ენა', 'Georgian text' );
32 SELECT is( public.naco_normalize('русский язык'), 'русскии язык', 'Russian text' );
33 SELECT is( public.naco_normalize(E'\r\npa\tper\f'), 'paper', 'other whitespace' );
34 SELECT is( public.naco_normalize('#1: ∃ C++, @ home & abroad'), '#1 c++ @ home & abroad', 'other punctuation' );
35 SELECT is( public.naco_normalize('٠١٢٣٤٥'), '012345', 'other decimal digits' );
36 SELECT is( public.naco_normalize('²³¹'), '231', 'superscript numbers' );
37 SELECT is( public.naco_normalize('♭©®♯'), '♭ ♯', 'other symbols' );
39 SELECT is( public.naco_normalize('Smith, Jane. Poet, painter, and author', 'a'), 'smith, jane poet painter and author',
40 'retain first comma' );
42 SELECT * FROM finish();