Nuspell
spell checker
Loading...
Searching...
No Matches
aff_data.hxx
1/* Copyright 2016-2022 Dimitrij Mijoski
2 *
3 * This file is part of Nuspell.
4 *
5 * Nuspell is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * Nuspell is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with Nuspell. If not, see <http://www.gnu.org/licenses/>.
17 */
18
19#ifndef NUSPELL_AFF_DATA_HXX
20#define NUSPELL_AFF_DATA_HXX
21
22#include "nuspell_export.h"
23#include "structures.hxx"
24
25#include <iosfwd>
26#include <unicode/locid.h>
27
28namespace nuspell {
29inline namespace v5 {
30
31class Encoding {
32 std::string name;
33
34 NUSPELL_EXPORT auto normalize_name() -> void;
35
36 public:
37 enum Enc_Type { SINGLEBYTE = false, UTF8 = true };
38
39 Encoding() = default;
40 explicit Encoding(const std::string& e) : name(e) { normalize_name(); }
41 explicit Encoding(std::string&& e) : name(move(e)) { normalize_name(); }
42 explicit Encoding(const char* e) : name(e) { normalize_name(); }
43 auto& operator=(const std::string& e)
44 {
45 name = e;
46 normalize_name();
47 return *this;
48 }
49 auto& operator=(std::string&& e)
50 {
51 name = move(e);
52 normalize_name();
53 return *this;
54 }
55 auto& operator=(const char* e)
56 {
57 name = e;
58 normalize_name();
59 return *this;
60 }
61 auto empty() const { return name.empty(); }
62 auto& value() const { return name; }
63 auto is_utf8() const { return name == "UTF-8"; }
64 auto value_or_default() const -> std::string
65 {
66 if (name.empty())
67 return "ISO8859-1";
68 else
69 return name;
70 }
71 operator Enc_Type() const { return is_utf8() ? UTF8 : SINGLEBYTE; }
72};
73
74enum class Flag_Type { SINGLE_CHAR, DOUBLE_CHAR, NUMBER, UTF8 };
75
86using Word_List = Hash_Multimap<std::string, Flag_Set>;
87
88struct Aff_Data {
89 static constexpr auto HIDDEN_HOMONYM_FLAG = char16_t(-1);
90 static constexpr auto MAX_SUGGESTIONS = size_t(16);
91
92 // spell checking options
93 Word_List words;
94 Prefix_Table prefixes;
95 Suffix_Table suffixes;
96
97 bool complex_prefixes;
98 bool fullstrip;
99 bool checksharps;
100 bool forbid_warn;
101 char16_t compound_onlyin_flag;
102 char16_t circumfix_flag;
103 char16_t forbiddenword_flag;
104 char16_t keepcase_flag;
105 char16_t need_affix_flag;
106 char16_t warn_flag;
107
108 // compounding options
109 char16_t compound_flag;
110 char16_t compound_begin_flag;
111 char16_t compound_last_flag;
112 char16_t compound_middle_flag;
113 Compound_Rule_Table compound_rules;
114
115 // spell checking options
116 Break_Table break_table;
117 Substr_Replacer input_substr_replacer;
118 std::string ignored_chars;
119 icu::Locale icu_locale;
120 Substr_Replacer output_substr_replacer;
121
122 // suggestion options
123 Replacement_Table replacements;
124 std::vector<Similarity_Group> similarities;
125 std::string keyboard_closeness;
126 std::string try_chars;
127 // Phonetic_Table phonetic_table;
128
129 char16_t nosuggest_flag;
130 char16_t substandard_flag;
131 unsigned short max_compound_suggestions;
132 unsigned short max_ngram_suggestions;
133 unsigned short max_diff_factor;
134 bool only_max_diff;
135 bool no_split_suggestions;
136 bool suggest_with_dots;
137
138 // compounding options
139 unsigned short compound_min_length;
140 unsigned short compound_max_word_count;
141 char16_t compound_permit_flag;
142 char16_t compound_forbid_flag;
143 char16_t compound_root_flag;
144 char16_t compound_force_uppercase;
145 bool compound_more_suffixes;
146 bool compound_check_duplicate;
147 bool compound_check_rep;
148 bool compound_check_case;
149 bool compound_check_triple;
150 bool compound_simplified_triple;
151 bool compound_syllable_num;
152 unsigned short compound_syllable_max;
153 std::string compound_syllable_vowels;
154 std::vector<Compound_Pattern> compound_patterns;
155
156 // data members used only while parsing
157 Flag_Type flag_type;
158 Encoding encoding;
159 std::vector<Flag_Set> flag_aliases;
160 std::string wordchars; // deprecated?
161
162 auto parse_aff(std::istream& in, std::ostream& err_msg) -> bool;
163 auto parse_dic(std::istream& in, std::ostream& err_msg) -> bool;
164 auto parse_aff_dic(std::istream& aff, std::istream& dic,
165 std::ostream& err_msg)
166 {
167 if (parse_aff(aff, err_msg))
168 return parse_dic(dic, err_msg);
169 return false;
170 }
171};
172} // namespace v5
173} // namespace nuspell
174#endif // NUSPELL_AFF_DATA_HXX
Definition structures.hxx:440
Definition structures.hxx:1382
Definition aff_data.hxx:31
Definition structures.hxx:1227
Definition structures.hxx:1520
Definition structures.hxx:315
Definition structures.hxx:1279
Library main namespace with version number attached.
Definition aff_data.cxx:42
Library main namespace.
Definition aff_data.cxx:33
Definition aff_data.hxx:88