Line data Source code
1 : /**
2 : Copyright (c) 2016-2022 Roman Katuntsev <sbkarr@stappler.org>
3 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
4 :
5 : Permission is hereby granted, free of charge, to any person obtaining a copy
6 : of this software and associated documentation files (the "Software"), to deal
7 : in the Software without restriction, including without limitation the rights
8 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 : copies of the Software, and to permit persons to whom the Software is
10 : furnished to do so, subject to the following conditions:
11 :
12 : The above copyright notice and this permission notice shall be included in
13 : all copies or substantial portions of the Software.
14 :
15 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 : THE SOFTWARE.
22 : **/
23 :
24 : #ifndef STAPPLER_CORE_STRING_SPCHARMATCHING_H_
25 : #define STAPPLER_CORE_STRING_SPCHARMATCHING_H_
26 :
27 : #include "SPCore.h"
28 :
29 : #define SPCHARMATCHING_LOG(...)
30 :
31 : namespace STAPPLER_VERSIONIZED stappler {
32 :
33 : enum class CharGroupId : uint32_t {
34 : // displayable groups
35 : None = 0,
36 : PunctuationBasic = 1 << 1,
37 : Numbers = 1 << 2,
38 : Latin = 1 << 3,
39 : Cyrillic = 1 << 4,
40 : Currency = 1 << 5,
41 : GreekBasic = 1 << 6,
42 : Math = 1 << 7,
43 : Arrows = 1 << 8,
44 : Fractionals = 1 << 9,
45 : LatinSuppl1 = 1 << 10,
46 : PunctuationAdvanced = 1 << 11,
47 : GreekAdvanced = 1 << 12,
48 :
49 : // non-displayable groups
50 : WhiteSpace = 1 << 13,
51 : Controls = 1 << 14,
52 : NonPrintable = 1 << 15,
53 :
54 : LatinLowercase = 1 << 16,
55 : LatinUppercase = 1 << 17,
56 :
57 : Alphanumeric = 1 << 18,
58 : Hexadecimial = 1 << 19,
59 : Base64 = 1 << 20,
60 :
61 : BreakableWhiteSpace = 1 << 21,
62 : OpticalAlignmentSpecial = 1 << 22,
63 : OpticalAlignmentBullet = 1 << 23,
64 :
65 : TextPunctuation = 1 << 24,
66 : };
67 :
68 : SP_DEFINE_ENUM_AS_MASK(CharGroupId)
69 :
70 : bool inCharGroup(CharGroupId mask, char16_t);
71 : bool inCharGroupMask(CharGroupId mask, char16_t);
72 : // WideString getCharGroup(CharGroupId mask);
73 :
74 : }
75 :
76 :
77 : namespace STAPPLER_VERSIONIZED stappler::chars {
78 :
79 : template <typename CharType>
80 : bool isupper(CharType);
81 :
82 : template <typename CharType>
83 : bool islower(CharType);
84 :
85 : template <typename CharType>
86 : bool isdigit(CharType);
87 :
88 : template <typename CharType>
89 : bool isxdigit(CharType);
90 :
91 : template <typename CharType>
92 : bool isspace(CharType);
93 :
94 : /* Inlined templates for char-matching
95 : *
96 : * Chars < valiable-length-char-list > - matched every character in list
97 : * Range < first-char, last-char > - matched all characters in specific range
98 : * CharGroup < GroupId > - matched specific named char group
99 : *
100 : * Compose < Chars|Range|CharGroup variable length list >
101 : *
102 : */
103 :
104 : using GroupId = CharGroupId;
105 :
106 : struct UniChar {
107 79900 : static inline bool match(char c) { return ((*(const uint8_t *)&c) & 128) != 0; }
108 : };
109 :
110 : template <typename CharType, CharType ... Args>
111 : struct Chars {
112 : static SPINLINE bool match(CharType c);
113 :
114 : template <typename Func>
115 : static SPINLINE void foreach(const Func &);
116 : };
117 :
118 : template <typename CharType, CharType First, CharType Last>
119 : struct Range {
120 : static inline bool match(CharType c) SPINLINE;
121 :
122 : template <typename Func>
123 : static inline void foreach(const Func &) SPINLINE;
124 : };
125 :
126 : template <typename CharType, typename ...Args>
127 : struct Compose {
128 : static inline bool match(CharType c) SPINLINE;
129 :
130 : template <typename Func>
131 : static inline void foreach(const Func &) SPINLINE;
132 : };
133 :
134 :
135 : template <typename CharType, GroupId Group>
136 : struct CharGroup;
137 :
138 : template <>
139 : struct CharGroup<char, GroupId::PunctuationBasic> : Compose<char,
140 : Range<char, '\x21', '\x2F'>, Range<char, '\x3A', '\x40'>, Range<char, '\x5B', '\x7F'>
141 : > {
142 : static bool match(char c);
143 : };
144 :
145 : template <>
146 : struct CharGroup<char, GroupId::Numbers> : Compose<char, Range<char, '0', '9'> > {
147 : static bool match(char c);
148 : };
149 :
150 : template <>
151 : struct CharGroup<char, GroupId::Latin> : Compose<char, Range<char, 'A', 'Z'>, Range<char, 'a', 'z'> > {
152 : static bool match(char c);
153 : };
154 :
155 : template <>
156 : struct CharGroup<char, GroupId::WhiteSpace> : Compose<char, Range<char, '\x09', '\x0D'>, Chars<char, '\x20'> > {
157 : static bool match(char c);
158 : };
159 :
160 : template <>
161 : struct CharGroup<char, GroupId::Controls> : Compose<char, Range<char, '\x01', '\x20'> > { };
162 :
163 : template <>
164 : struct CharGroup<char, GroupId::NonPrintable> : Compose<char,
165 : Range<char, '\x01', '\x20'>, Chars<char, '\x20'>
166 : > { };
167 :
168 : template <>
169 : struct CharGroup<char, GroupId::LatinLowercase> : Compose<char, Range<char, 'a', 'z'> > {
170 : static bool match(char c);
171 : };
172 :
173 : template <>
174 : struct CharGroup<char, GroupId::LatinUppercase> : Compose<char, Range<char, 'A', 'Z'> > {
175 : static bool match(char c);
176 : };
177 :
178 : template <>
179 : struct CharGroup<char, GroupId::Alphanumeric> : Compose<char,
180 : Range<char, '0', '9'>, Range<char, 'A', 'Z'>, Range<char, 'a', 'z'>
181 : > {
182 : static bool match(char c);
183 : };
184 :
185 : template <>
186 : struct CharGroup<char, GroupId::Hexadecimial> : Compose<char,
187 : Range<char, '0', '9'>, Range<char, 'A', 'F'>, Range<char, 'a', 'f'>
188 : > {
189 : static bool match(char c);
190 : };
191 :
192 : template <>
193 : struct CharGroup<char, GroupId::Base64> : Compose<char,
194 : Range<char, '0', '9'>, Range<char, 'A', 'Z'>, Range<char, 'a', 'z'>, Chars<char, '=', '/', '+', '_', '-'>
195 : > {
196 : static bool match(char c);
197 : };
198 :
199 : template <>
200 : struct CharGroup<char, GroupId::TextPunctuation> : Compose<char,
201 : Chars<char, '=', '/', '(', ')', '.', ',', '-', '\'', '"', ':', ';', '?', '!', '@', '#', '$', '%', '^', '*', '\\', '_', '+', '[', ']'>
202 : > {
203 : static bool match(char c);
204 : };
205 :
206 :
207 : template <>
208 : struct CharGroup<char16_t, GroupId::PunctuationBasic> : Compose<char16_t,
209 : Range<char16_t, u'\u0021', u'\u002F'>,
210 : Range<char16_t, u'\u003A', u'\u0040'>,
211 : Range<char16_t, u'\u005B', u'\u0060'>,
212 : Range<char16_t, u'\u007B', u'\u007E'>,
213 : Range<char16_t, u'\u00A1', u'\u00BF'>,
214 : Chars<char16_t, u'\u00AD', u'\u2013', u'\u2014', u'\u2019', u'\u201c', u'\u201d', u'\u2116'>
215 : > { };
216 :
217 : template <>
218 : struct CharGroup<char16_t, GroupId::Numbers> : Compose<char16_t, Range<char16_t, u'0', u'9'> > { };
219 :
220 : template <>
221 : struct CharGroup<char16_t, GroupId::Latin> : Compose<char16_t,
222 : Range<char16_t, u'A', u'Z'>,
223 : Range<char16_t, u'a', u'z'>
224 : > { };
225 :
226 : template <>
227 : struct CharGroup<char16_t, GroupId::Cyrillic> : Compose<char16_t,
228 : Range<char16_t, u'А', u'Я'>,
229 : Range<char16_t, u'а', u'я'>,
230 : Chars<char16_t, u'Ё', u'ё'>
231 : > { };
232 :
233 : template <>
234 : struct CharGroup<char16_t, GroupId::Currency> : Compose<char16_t, Range<char16_t, u'\u20A0', u'\u20BE'> > { };
235 :
236 : template <>
237 : struct CharGroup<char16_t, GroupId::GreekBasic> : Compose<char16_t,
238 : Range<char16_t, u'\u0391', u'\u03AB'>,
239 : Range<char16_t, u'\u03B1', u'\u03CB'>
240 : > { };
241 :
242 : template <>
243 : struct CharGroup<char16_t, GroupId::Math> : Compose<char16_t, Range<char16_t, u'\u2200', u'\u22FF'> > { };
244 :
245 : template <>
246 : struct CharGroup<char16_t, GroupId::Arrows> : Compose<char16_t, Range<char16_t, u'\u2190', u'\u21FF'> > { };
247 :
248 : template <>
249 : struct CharGroup<char16_t, GroupId::Fractionals> : Compose<char16_t, Range<char16_t, u'\u2150', u'\u215F'> > { };
250 :
251 : template <>
252 : struct CharGroup<char16_t, GroupId::LatinSuppl1> : Compose<char16_t, Range<char16_t, u'\u00C0', u'\u00FF'> > { };
253 :
254 : template <>
255 : struct CharGroup<char16_t, GroupId::PunctuationAdvanced> : Compose<char16_t,
256 : Range<char16_t, u'\u0021', u'\u002F'>,
257 : Range<char16_t, u'\u003A', u'\u0040'>,
258 : Range<char16_t, u'\u005B', u'\u0060'>,
259 : Range<char16_t, u'\u007B', u'\u007F'>,
260 : Range<char16_t, u'\u00A1', u'\u00BF'>,
261 : Range<char16_t, u'\u2010', u'\u201F'>,
262 : Range<char16_t, u'\u2024', u'\u2027'>,
263 : Range<char16_t, u'\u2030', u'\u203D'>,
264 : Chars<char16_t, u'\u2013', u'\u2014', u'\u2019', u'\u201c', u'\u201d', u'\u2116'>
265 : > { };
266 :
267 : template <>
268 : struct CharGroup<char16_t, GroupId::GreekAdvanced> : Compose<char16_t,
269 : Range<char16_t, u'\u0391', u'\u03AB'>,
270 : Range<char16_t, u'\u03B1', u'\u03CB'>,
271 : Range<char16_t, u'\u0370', u'\u0377'>,
272 : Range<char16_t, u'\u037A', u'\u037F'>,
273 : Range<char16_t, u'\u0384', u'\u038A'>,
274 : Range<char16_t, u'\u038E', u'\u0390'>,
275 : Range<char16_t, u'\u03AC', u'\u03B0'>,
276 : Range<char16_t, u'\u03CC', u'\u03FF'>,
277 : Chars<char16_t, u'\u038C'>
278 : > { };
279 :
280 : template <>
281 : struct CharGroup<char16_t, GroupId::WhiteSpace> : Compose<char16_t,
282 : Range<char16_t, u'\u0009', u'\u000D'>,
283 : Range<char16_t, u'\u2000', u'\u200D'>,
284 : Chars<char16_t, u'\u0020', u'\u0085', u'\u00A0', u'\u1680', u'\u2028', u'\u2029',
285 : u'\u202F', u'\u205F', u'\u2060', u'\u3000', u'\uFEFF', u'\uFFFF'>
286 : > { };
287 :
288 : template <>
289 : struct CharGroup<char16_t, GroupId::Controls> : Compose<char16_t, Range<char16_t, u'\u0001', u'\u0020'> > { };
290 :
291 : template <>
292 : struct CharGroup<char16_t, GroupId::NonPrintable> : Compose<char16_t,
293 : Range<char16_t, u'\u0001', u'\u0020'>,
294 : Range<char16_t, u'\u2000', u'\u200D'>,
295 : Chars<char16_t, u'\u0020', u'\u0085', u'\u00A0', u'\u1680', u'\u2028', u'\u2029',
296 : u'\u202F', u'\u205F', u'\u2060', u'\u3000', u'\uFEFF', u'\uFFFF'>
297 : > { };
298 :
299 : template <>
300 : struct CharGroup<char16_t, GroupId::LatinLowercase> : Compose<char16_t, Range<char16_t, u'a', u'z'> > { };
301 :
302 : template <>
303 : struct CharGroup<char16_t, GroupId::LatinUppercase> : Compose<char16_t, Range<char16_t, u'A', u'Z'> > { };
304 :
305 : template <>
306 : struct CharGroup<char16_t, GroupId::Alphanumeric> : Compose<char16_t,
307 : Range<char16_t, u'0', u'9'>,
308 : Range<char16_t, u'A', u'Z'>,
309 : Range<char16_t, u'a', u'z'>
310 : > { };
311 :
312 : template <>
313 : struct CharGroup<char16_t, GroupId::Hexadecimial> : Compose<char16_t,
314 : Range<char16_t, u'0', u'9'>,
315 : Range<char16_t, u'A', u'F'>,
316 : Range<char16_t, u'a', u'f'>
317 : > { };
318 :
319 : template <>
320 : struct CharGroup<char16_t, GroupId::Base64> : Compose<char16_t,
321 : Range<char16_t, u'0', u'9'>,
322 : Range<char16_t, u'A', u'Z'>,
323 : Range<char16_t, u'a', u'z'>,
324 : Chars<char16_t, u'=', u'/', u'+', u'-', u'_'>
325 : > { };
326 :
327 : template <>
328 : struct CharGroup<char16_t, GroupId::BreakableWhiteSpace> : Compose<char16_t,
329 : Range<char16_t, u'\u0009', u'\u000D'>,
330 : Range<char16_t, u'\u2000', u'\u200D'>,
331 : Chars<char16_t, u'\u0020', u'\u0085', u'\u1680', u'\u2028', u'\u2029',
332 : u'\u205F', u'\u2060', u'\u3000', u'\uFEFF'>
333 : > { };
334 :
335 : template <>
336 : struct CharGroup<char16_t, GroupId::OpticalAlignmentSpecial> : Compose<char16_t,
337 : Chars<char16_t, u'(', u'[', u'{', u'"', u'\'', u'\\', u'<', u'«', u'„', u'.', u',', u'\u00AD', u'-'>
338 : > { };
339 :
340 : template <>
341 : struct CharGroup<char16_t, GroupId::OpticalAlignmentBullet> : Compose<char16_t,
342 : Range<char16_t, u'0', u'9'>,
343 : Chars<char16_t, u'—', u'–', u'―', u'•', u'‣', u'⁃', u'-', u'*', u'◦', u'■', u'.', u',', u')'>
344 : > { };
345 :
346 : template <>
347 : struct CharGroup<char16_t, GroupId::TextPunctuation> : Compose<char16_t,
348 : Chars<char16_t, u'=', u'/', u'(', u')', u'.', u',', u'-', u'\'', u'"'
349 : , u':', u';', u'?', u'!', u'@', u'#', u'$', u'%', u'^', u'*', u'\\'
350 : , u'_', u'+', u'[', u']', u'«', u'»'>
351 : > { };
352 :
353 : // char32_t
354 :
355 : template <>
356 : struct CharGroup<char32_t, GroupId::PunctuationBasic> : Compose<char32_t,
357 : Range<char32_t, U'\u0021', U'\u002F'>,
358 : Range<char32_t, U'\u003A', U'\u0040'>,
359 : Range<char32_t, U'\u005B', U'\u0060'>,
360 : Range<char32_t, U'\u007B', U'\u007E'>,
361 : Range<char32_t, U'\u00A1', U'\u00BF'>,
362 : Chars<char32_t, U'\u00AD', U'\u2013', U'\u2014', U'\u2019', U'\u201c', U'\u201d', U'\u2116'>
363 : > { };
364 :
365 : template <>
366 : struct CharGroup<char32_t, GroupId::Numbers> : Compose<char32_t, Range<char32_t, U'0', U'9'> > { };
367 :
368 : template <>
369 : struct CharGroup<char32_t, GroupId::Latin> : Compose<char32_t,
370 : Range<char32_t, U'A', U'Z'>,
371 : Range<char32_t, U'a', U'z'>
372 : > { };
373 :
374 : template <>
375 : struct CharGroup<char32_t, GroupId::Cyrillic> : Compose<char32_t,
376 : Range<char32_t, U'А', U'Я'>,
377 : Range<char32_t, U'а', U'я'>,
378 : Chars<char32_t, U'Ё', U'ё'>
379 : > { };
380 :
381 : template <>
382 : struct CharGroup<char32_t, GroupId::Currency> : Compose<char32_t, Range<char32_t, U'\u20A0', U'\u20BE'> > { };
383 :
384 : template <>
385 : struct CharGroup<char32_t, GroupId::GreekBasic> : Compose<char32_t,
386 : Range<char32_t, U'\u0391', U'\u03AB'>,
387 : Range<char32_t, U'\u03B1', U'\u03CB'>
388 : > { };
389 :
390 : template <>
391 : struct CharGroup<char32_t, GroupId::Math> : Compose<char32_t, Range<char32_t, U'\u2200', U'\u22FF'> > { };
392 :
393 : template <>
394 : struct CharGroup<char32_t, GroupId::Arrows> : Compose<char32_t, Range<char32_t, U'\u2190', U'\u21FF'> > { };
395 :
396 : template <>
397 : struct CharGroup<char32_t, GroupId::Fractionals> : Compose<char32_t, Range<char32_t, U'\u2150', U'\u215F'> > { };
398 :
399 : template <>
400 : struct CharGroup<char32_t, GroupId::LatinSuppl1> : Compose<char32_t, Range<char32_t, U'\u00C0', U'\u00FF'> > { };
401 :
402 : template <>
403 : struct CharGroup<char32_t, GroupId::PunctuationAdvanced> : Compose<char32_t,
404 : Range<char32_t, U'\u0021', U'\u002F'>,
405 : Range<char32_t, U'\u003A', U'\u0040'>,
406 : Range<char32_t, U'\u005B', U'\u0060'>,
407 : Range<char32_t, U'\u007B', U'\u007F'>,
408 : Range<char32_t, U'\u00A1', U'\u00BF'>,
409 : Range<char32_t, U'\u2010', U'\u201F'>,
410 : Range<char32_t, U'\u2024', U'\u2027'>,
411 : Range<char32_t, U'\u2030', U'\u203D'>,
412 : Chars<char32_t, U'\u2013', U'\u2014', U'\u2019', U'\u201c', U'\u201d', U'\u2116'>
413 : > { };
414 :
415 : template <>
416 : struct CharGroup<char32_t, GroupId::GreekAdvanced> : Compose<char32_t,
417 : Range<char32_t, U'\u0391', U'\u03AB'>,
418 : Range<char32_t, U'\u03B1', U'\u03CB'>,
419 : Range<char32_t, U'\u0370', U'\u0377'>,
420 : Range<char32_t, U'\u037A', U'\u037F'>,
421 : Range<char32_t, U'\u0384', U'\u038A'>,
422 : Range<char32_t, U'\u038E', U'\u0390'>,
423 : Range<char32_t, U'\u03AC', U'\u03B0'>,
424 : Range<char32_t, U'\u03CC', U'\u03FF'>,
425 : Chars<char32_t, U'\u038C'>
426 : > { };
427 :
428 : template <>
429 : struct CharGroup<char32_t, GroupId::WhiteSpace> : Compose<char32_t,
430 : Range<char32_t, U'\u0009', U'\u000D'>,
431 : Range<char32_t, U'\u2000', U'\u200D'>,
432 : Chars<char32_t, U'\u0020', U'\u0085', U'\u00A0', U'\u1680', U'\u2028', U'\u2029',
433 : U'\u202F', U'\u205F', U'\u2060', U'\u3000', U'\uFEFF', U'\uFFFF'>
434 : > { };
435 :
436 : template <>
437 : struct CharGroup<char32_t, GroupId::Controls> : Compose<char32_t, Range<char32_t, U'\u0001', U'\u0020'> > { };
438 :
439 : template <>
440 : struct CharGroup<char32_t, GroupId::NonPrintable> : Compose<char32_t,
441 : Range<char32_t, U'\u0001', U'\u0020'>,
442 : Range<char32_t, U'\u2000', U'\u200D'>,
443 : Chars<char32_t, U'\u0020', U'\u0085', U'\u00A0', U'\u1680', U'\u2028', U'\u2029',
444 : U'\u202F', U'\u205F', U'\u2060', U'\u3000', U'\uFEFF', U'\uFFFF'>
445 : > { };
446 :
447 : template <>
448 : struct CharGroup<char32_t, GroupId::LatinLowercase> : Compose<char32_t, Range<char32_t, U'a', U'z'> > { };
449 :
450 : template <>
451 : struct CharGroup<char32_t, GroupId::LatinUppercase> : Compose<char32_t, Range<char32_t, U'A', U'Z'> > { };
452 :
453 : template <>
454 : struct CharGroup<char32_t, GroupId::Alphanumeric> : Compose<char32_t,
455 : Range<char32_t, U'0', U'9'>,
456 : Range<char32_t, U'A', U'Z'>,
457 : Range<char32_t, U'a', U'z'>
458 : > { };
459 :
460 : template <>
461 : struct CharGroup<char32_t, GroupId::Hexadecimial> : Compose<char32_t,
462 : Range<char32_t, U'0', U'9'>,
463 : Range<char32_t, U'A', U'F'>,
464 : Range<char32_t, U'a', U'f'>
465 : > { };
466 :
467 : template <>
468 : struct CharGroup<char32_t, GroupId::Base64> : Compose<char32_t,
469 : Range<char32_t, U'0', U'9'>,
470 : Range<char32_t, U'A', U'Z'>,
471 : Range<char32_t, U'a', U'z'>,
472 : Chars<char32_t, U'=', U'/', U'+', U'-', U'_'>
473 : > { };
474 :
475 : template <>
476 : struct CharGroup<char32_t, GroupId::BreakableWhiteSpace> : Compose<char32_t,
477 : Range<char32_t, U'\u0009', U'\u000D'>,
478 : Range<char32_t, U'\u2000', U'\u200D'>,
479 : Chars<char32_t, U'\u0020', U'\u0085', U'\u1680', U'\u2028', U'\u2029',
480 : U'\u205F', U'\u2060', U'\u3000', U'\uFEFF'>
481 : > { };
482 :
483 : template <>
484 : struct CharGroup<char32_t, GroupId::OpticalAlignmentSpecial> : Compose<char32_t,
485 : Chars<char32_t, U'(', U'[', U'{', U'"', U'\'', U'\\', U'<', U'«', U'„', U'.', U',', U'\u00AD', U'-'>
486 : > { };
487 :
488 : template <>
489 : struct CharGroup<char32_t, GroupId::OpticalAlignmentBullet> : Compose<char32_t,
490 : Range<char32_t, U'0', U'9'>,
491 : Chars<char32_t, U'—', U'–', U'―', U'•', U'‣', U'⁃', U'-', U'*', U'◦', U'■', U'.', U',', U')'>
492 : > { };
493 :
494 : template <>
495 : struct CharGroup<char32_t, GroupId::TextPunctuation> : Compose<char32_t,
496 : Chars<char32_t, U'=', U'/', U'(', U')', U'.', U',', U'-', U'\'', U'"'
497 : , U':', U';', U'?', U'!', U'@', U'#', U'$', U'%', U'^', U'*', U'\\'
498 : , U'_', U'+', U'[', U']', U'«', U'»'>
499 : > { };
500 :
501 :
502 : class MatchTraits {
503 : public:
504 : template <typename CharType, CharType ... Args>
505 : static inline bool matchChar(CharType c) SPINLINE;
506 :
507 : template <typename CharType, CharType First, CharType Last>
508 : static inline bool matchPair(CharType c) SPINLINE;
509 :
510 : template <typename CharType, typename ...Args>
511 : static inline bool matchCompose(CharType c) SPINLINE;
512 :
513 :
514 : template <typename CharType, typename Func, CharType ... Args>
515 : static inline void foreachChar(const Func &) SPINLINE;
516 :
517 : template <typename CharType, typename Func, CharType First, CharType Last>
518 : static inline void foreachPair(const Func &) SPINLINE;
519 :
520 : template <typename CharType, typename Func, typename ...Args>
521 : static inline void foreachCompose(const Func &) SPINLINE;
522 :
523 : private:
524 : template <typename CharType, CharType T>
525 : static inline bool _matchChar(CharType c) SPINLINE;
526 :
527 : template <typename CharType, CharType T, CharType T1, CharType ... Args>
528 : static inline bool _matchChar(CharType c) SPINLINE;
529 :
530 : template <typename CharType, typename T>
531 : static inline bool _matchCompose(CharType c) SPINLINE;
532 :
533 : template <typename CharType, typename T, typename T1, typename ... Args>
534 : static inline bool _matchCompose(CharType c) SPINLINE;
535 :
536 :
537 : template <typename CharType, typename Func, CharType T>
538 : static inline void _foreachChar(const Func &) SPINLINE;
539 :
540 : template <typename CharType, typename Func, CharType T, CharType T1, CharType ... Args>
541 : static inline void _foreachChar(const Func &) SPINLINE;
542 :
543 : template <typename CharType, typename Func, typename T>
544 : static inline void _foreachCompose(const Func &) SPINLINE;
545 :
546 : template <typename CharType, typename Func, typename T, typename T1, typename ... Args>
547 : static inline void _foreachCompose(const Func &) SPINLINE;
548 : };
549 :
550 : template <typename CharType, CharType ... Args>
551 : inline bool Chars<CharType, Args...>::match(CharType c) {
552 2922118753 : return MatchTraits::matchChar<CharType, Args...>(c);
553 : }
554 :
555 : template <typename CharType, CharType ... Args>
556 : template <typename Func>
557 : inline void Chars<CharType, Args...>::foreach(const Func &f) {
558 : MatchTraits::foreachChar<CharType, Func, Args...>(f);
559 25 : }
560 :
561 : template <typename CharType, CharType First, CharType Last>
562 : inline bool Range<CharType, First, Last>::match(CharType c) {
563 17776841 : return MatchTraits::matchPair<CharType, First, Last>(c);
564 : }
565 :
566 : template <typename CharType, CharType First, CharType Last>
567 : template <typename Func>
568 : inline void Range<CharType, First, Last>::foreach(const Func &f) {
569 : MatchTraits::foreachPair<CharType, Func, First, Last>(f);
570 75 : }
571 :
572 : template <typename CharType, typename ...Args>
573 : inline bool Compose<CharType, Args...>::match(CharType c) {
574 2988063754 : return MatchTraits::matchCompose<CharType, Args...>(c);
575 : }
576 :
577 : template <typename CharType, typename ... Args>
578 : template <typename Func>
579 : inline void Compose<CharType, Args...>::foreach(const Func &f) {
580 : MatchTraits::foreachCompose<CharType, Func, Args...>(f);
581 50 : }
582 :
583 : template <typename CharType, CharType ... Args>
584 : inline bool MatchTraits::matchChar(CharType c) {
585 2922118753 : return _matchChar<CharType, Args...>(c);
586 : }
587 :
588 : template <typename CharType, CharType First, CharType Last>
589 : inline bool MatchTraits::matchPair(CharType c) {
590 : SPCHARMATCHING_LOG("Match range %d - %d : %d %d", First, Last, c, First <= c && c <= Last);
591 17773691 : return First <= c && c <= Last;
592 : }
593 :
594 : template <typename CharType, typename ... Args>
595 : inline bool MatchTraits::matchCompose(CharType c) {
596 : SPCHARMATCHING_LOG("begin compose %d", c);
597 5972887461 : auto ret = _matchCompose<CharType, Args...>(c);
598 : SPCHARMATCHING_LOG("end compose %d %d", c, ret);
599 2988063754 : return ret;
600 : }
601 :
602 : template <typename CharType, typename Func, CharType ... Args>
603 : inline void MatchTraits::foreachChar(const Func &f) {
604 25 : return _foreachChar<CharType, Func, Args...>(f);
605 : }
606 :
607 : template <typename CharType, typename Func, CharType First, CharType Last>
608 : inline void MatchTraits::foreachPair(const Func &f) {
609 1550 : for (CharType c = First; c >= 0 && c <= Last; c++) {
610 1475 : f(c);
611 : }
612 75 : }
613 :
614 : template <typename CharType, typename Func, typename ... Args>
615 : inline void MatchTraits::foreachCompose(const Func &f) {
616 : _foreachCompose<CharType, Func, Args...>(f);
617 50 : }
618 :
619 : template <typename CharType, CharType C>
620 : inline bool MatchTraits::_matchChar(CharType c) {
621 : SPCHARMATCHING_LOG("Match char %d %d %d", C, c, C == c);
622 5928372524 : return C == c;
623 : }
624 :
625 : template <typename CharType, CharType T, CharType T2, CharType ... Args>
626 : inline bool MatchTraits::_matchChar(CharType c) {
627 6321597817 : return _matchChar<CharType, T>(c) || _matchChar<CharType, T2, Args...>(c);
628 : }
629 :
630 : template <typename CharType, typename C>
631 : inline bool MatchTraits::_matchCompose(CharType c) {
632 5921708192 : return C::match(c);
633 : }
634 :
635 : template <typename CharType, typename T, typename T1, typename ... Args>
636 : inline bool MatchTraits::_matchCompose(CharType c) {
637 39603505 : return _matchCompose<CharType, T>(c) || _matchCompose<CharType, T1, Args...>(c);
638 : }
639 :
640 :
641 : template <typename CharType, typename Func, CharType T>
642 : inline void MatchTraits::_foreachChar(const Func &f) {
643 600 : f(T);
644 600 : }
645 :
646 : template <typename CharType, typename Func, CharType T, CharType T1, CharType ... Args>
647 : inline void MatchTraits::_foreachChar(const Func &f) {
648 : _foreachChar<CharType, Func, T>(f);
649 : _foreachChar<CharType, Func, T1, Args...>(f);
650 25 : }
651 :
652 : template <typename CharType, typename Func, typename T>
653 : inline void MatchTraits::_foreachCompose(const Func &f) {
654 : T::foreach(f);
655 100 : }
656 :
657 : template <typename CharType, typename Func, typename T, typename T1, typename ... Args>
658 : inline void MatchTraits::_foreachCompose(const Func &f) {
659 : _foreachCompose<CharType, Func, T>(f);
660 : _foreachCompose<CharType, Func, T1, Args...>(f);
661 25 : }
662 :
663 : template <typename CharType>
664 1600 : inline bool isupper(CharType c) {
665 1600 : return CharGroup<CharType, GroupId::LatinUppercase>::match(c);
666 : }
667 :
668 : template <typename CharType>
669 3150 : inline bool islower(CharType c) {
670 3150 : return CharGroup<CharType, GroupId::LatinLowercase>::match(c);
671 : }
672 :
673 : template <typename CharType>
674 14000 : inline bool isdigit(CharType c) {
675 14000 : return CharGroup<CharType, GroupId::Numbers>::match(c);
676 : }
677 :
678 : template <typename CharType>
679 25 : inline bool isxdigit(CharType c) {
680 25 : return CharGroup<CharType, GroupId::Hexadecimial>::match(c);
681 : }
682 :
683 : template <typename CharType>
684 4426094 : bool isspace(CharType c) {
685 8852188 : return CharGroup<CharType, GroupId::WhiteSpace>::match(c);
686 : }
687 :
688 : }
689 :
690 : #endif /* STAPPLER_CORE_STRING_SPCHARMATCHING_H_ */
|