4 use List::MoreUtils qw/uniq/;
18 while (my $data = <>) {
21 chomp($data); $data=lc($data);
24 while( $data =~ m/([\w\d]+'*[\w\d]*)/g ) {
28 for my $raw (uniq @words) {
30 $dict{$key} //= [0,[]];
33 if ($dict{$key}[0] == 1) { # first time we've seen it, need to generate prefix keys
34 push @{$dict{$key}[1]}, $raw;
36 if (length($raw) > $plen) {
37 $key = substr($raw,0,$plen);
38 $dict{$key} //= [0,[]];
39 push @{$dict{$key}[1]}, $raw;
42 for my $edit (symspell_generate_edits($key, 1)) {
43 $dict{$edit} //= [0,[]];
44 push @{$dict{$edit}[1]}, $raw;
49 unless ($line % 10000) {
51 $secs = $etime - $stime;
52 warn "$line lines consumed from input in $secs seconds...\n";
57 $secs = $etime - $stime;
58 warn "Dictionary built in $secs seconds, writing...\n";
64 CREATE UNLOGGED TABLE search.symspell_dictionary_partial_$class (
67 ${class}_suggestions TEXT[]
70 COPY search.symspell_dictionary_partial_$class FROM STDIN;
73 while ( my ($key, $cl_dict) = each %dict ) {
75 print join( "\t", $key, $$cl_dict[0], (scalar(@{$$cl_dict[1]}) ? '{'.join(',', uniq @{$$cl_dict[1]}).'}' : '\N')) . "\n";
82 INSERT INTO search.symspell_dictionary (prefix_key, ${class}_count, ${class}_suggestions)
83 SELECT * FROM search.symspell_dictionary_partial_$class
84 ON CONFLICT (prefix_key) DO UPDATE
85 SET ${class}_count = EXCLUDED.${class}_count,
86 ${class}_suggestions = EXCLUDED.${class}_suggestions;
91 $secs = $etime - $stime;
92 warn "$counter dictionary prefix key entries written in $secs seconds.\n";
94 sub symspell_generate_edits {
100 my $len = length($word);
102 while ( $c <= $len ) {
103 my $item = substr($word, 0, $c - 1) . substr($word, $c);
105 if ($dist < $maxed) {
106 push @sublist, symspell_generate_edits($item, $dist + 1);
111 push @list, @sublist;
114 #warn join(', ', uniq @list) . "\n";