Line data Source code
1 : /**
2 : Copyright (c) 2016-2022 Roman Katuntsev <sbkarr@stappler.org>
3 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
4 :
5 : Permission is hereby granted, free of charge, to any person obtaining a copy
6 : of this software and associated documentation files (the "Software"), to deal
7 : in the Software without restriction, including without limitation the rights
8 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 : copies of the Software, and to permit persons to whom the Software is
10 : furnished to do so, subject to the following conditions:
11 :
12 : The above copyright notice and this permission notice shall be included in
13 : all copies or substantial portions of the Software.
14 :
15 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 : THE SOFTWARE.
22 : **/
23 :
24 : #include "SPCharGroup.h"
25 :
26 : namespace STAPPLER_VERSIONIZED stappler {
27 :
28 5242880050 : bool inCharGroup(CharGroupId mask, char16_t c) {
29 5242880050 : switch (mask) {
30 25 : case CharGroupId::None: return false; break;
31 100 : case CharGroupId::PunctuationBasic: return chars::CharGroup<char16_t, CharGroupId::PunctuationBasic>::match(c); break;
32 100 : case CharGroupId::Numbers: return chars::CharGroup<char16_t, CharGroupId::Numbers>::match(c); break;
33 100 : case CharGroupId::Latin: return chars::CharGroup<char16_t, CharGroupId::Latin>::match(c); break;
34 100 : case CharGroupId::Cyrillic: return chars::CharGroup<char16_t, CharGroupId::Cyrillic>::match(c); break;
35 100 : case CharGroupId::Currency: return chars::CharGroup<char16_t, CharGroupId::Currency>::match(c); break;
36 100 : case CharGroupId::GreekBasic: return chars::CharGroup<char16_t, CharGroupId::GreekBasic>::match(c); break;
37 100 : case CharGroupId::Math: return chars::CharGroup<char16_t, CharGroupId::Math>::match(c); break;
38 100 : case CharGroupId::Arrows: return chars::CharGroup<char16_t, CharGroupId::Arrows>::match(c); break;
39 100 : case CharGroupId::Fractionals: return chars::CharGroup<char16_t, CharGroupId::Fractionals>::match(c); break;
40 100 : case CharGroupId::LatinSuppl1: return chars::CharGroup<char16_t, CharGroupId::LatinSuppl1>::match(c); break;
41 100 : case CharGroupId::PunctuationAdvanced: return chars::CharGroup<char16_t, CharGroupId::PunctuationAdvanced>::match(c); break;
42 100 : case CharGroupId::GreekAdvanced: return chars::CharGroup<char16_t, CharGroupId::GreekAdvanced>::match(c); break;
43 100 : case CharGroupId::WhiteSpace: return chars::CharGroup<char16_t, CharGroupId::WhiteSpace>::match(c); break;
44 100 : case CharGroupId::Controls: return chars::CharGroup<char16_t, CharGroupId::Controls>::match(c); break;
45 100 : case CharGroupId::NonPrintable: return chars::CharGroup<char16_t, CharGroupId::NonPrintable>::match(c); break;
46 100 : case CharGroupId::LatinLowercase: return chars::CharGroup<char16_t, CharGroupId::LatinLowercase>::match(c); break;
47 100 : case CharGroupId::LatinUppercase: return chars::CharGroup<char16_t, CharGroupId::LatinUppercase>::match(c); break;
48 100 : case CharGroupId::Alphanumeric: return chars::CharGroup<char16_t, CharGroupId::Alphanumeric>::match(c); break;
49 100 : case CharGroupId::Hexadecimial: return chars::CharGroup<char16_t, CharGroupId::Hexadecimial>::match(c); break;
50 100 : case CharGroupId::Base64: return chars::CharGroup<char16_t, CharGroupId::Base64>::match(c); break;
51 100 : case CharGroupId::BreakableWhiteSpace: return chars::CharGroup<char16_t, CharGroupId::BreakableWhiteSpace>::match(c); break;
52 100 : case CharGroupId::OpticalAlignmentSpecial: return chars::CharGroup<char16_t, CharGroupId::OpticalAlignmentSpecial>::match(c); break;
53 100 : case CharGroupId::OpticalAlignmentBullet: return chars::CharGroup<char16_t, CharGroupId::OpticalAlignmentBullet>::match(c); break;
54 100 : case CharGroupId::TextPunctuation: return chars::CharGroup<char16_t, CharGroupId::TextPunctuation>::match(c); break;
55 : }
56 5242878825 : return false;
57 : }
58 :
59 419430425 : bool inCharGroupMask(CharGroupId mask, char16_t c) {
60 13421772300 : for (size_t i = 1; i < sizeof(CharGroupId) * 8; i++) {
61 13002341925 : CharGroupId val = CharGroupId(1 << i);
62 13002341925 : if ((mask & val) != CharGroupId::None) {
63 4823449625 : if (inCharGroup(mask, c)) {
64 50 : return true;
65 : }
66 : }
67 : }
68 419430375 : return false;
69 : }
70 :
71 : namespace chars {
72 :
73 : enum class SmartType : uint8_t {
74 : PunctuationBasic = 1 << 0,
75 : Numbers = 1 << 1,
76 : WhiteSpace = 1 << 2,
77 : LatinLowercase = 1 << 3,
78 : LatinUppercase = 1 << 4,
79 : Hexadecimial = 1 << 5,
80 : Base64 = 1 << 6,
81 : TextPunctuation = 1 << 7
82 : };
83 :
84 : static uint8_t smart_lookup_table[256] = {
85 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 0, 0,
86 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 : 4, 129, 129, 129, 129, 129, 1, 129, 129, 129, 129, 193, 129, 193, 129, 193,
88 : 98, 98, 98, 98, 98, 98, 98, 98, 98, 98, 129, 129, 1, 193, 1, 129,
89 : 129, 112, 112, 112, 112, 112, 112, 80, 80, 80, 80, 80, 80, 80, 80, 80,
90 : 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 129, 129, 129, 129, 193,
91 : 1, 105, 105, 105, 105, 105, 105, 73, 73, 73, 73, 73, 73, 73, 73, 73,
92 : 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 1, 1, 1, 1, 1,
93 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
94 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
96 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
97 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
98 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
99 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
100 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
101 : };
102 :
103 25 : bool CharGroup<char, GroupId::PunctuationBasic>::match(char c) {
104 25 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::PunctuationBasic);
105 : }
106 :
107 53407298 : bool CharGroup<char, GroupId::Numbers>::match(char c) {
108 53407298 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::Numbers);
109 : }
110 :
111 449755 : bool CharGroup<char, GroupId::Latin>::match(char c) {
112 449755 : return smart_lookup_table[((const uint8_t *)&c)[0]] & (toInt(SmartType::LatinLowercase) | toInt(SmartType::LatinUppercase));
113 : }
114 :
115 6517245 : bool CharGroup<char, GroupId::WhiteSpace>::match(char c) {
116 6517245 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::WhiteSpace);
117 : }
118 :
119 3150 : bool CharGroup<char, GroupId::LatinLowercase>::match(char c) {
120 3150 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::LatinLowercase);
121 : }
122 :
123 30700 : bool CharGroup<char, GroupId::LatinUppercase>::match(char c) {
124 30700 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::LatinUppercase);
125 : }
126 :
127 573025 : bool CharGroup<char, GroupId::Alphanumeric>::match(char c) {
128 573025 : return smart_lookup_table[((const uint8_t *)&c)[0]] & (toInt(SmartType::LatinLowercase) | toInt(SmartType::LatinUppercase) | toInt(SmartType::Numbers));
129 : }
130 :
131 3825 : bool CharGroup<char, GroupId::Hexadecimial>::match(char c) {
132 3825 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::Hexadecimial);
133 : }
134 :
135 131600 : bool CharGroup<char, GroupId::Base64>::match(char c) {
136 131600 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::Base64);
137 : }
138 :
139 25 : bool CharGroup<char, GroupId::TextPunctuation>::match(char c) {
140 25 : return smart_lookup_table[((const uint8_t *)&c)[0]] & toInt(SmartType::TextPunctuation);
141 : }
142 :
143 : }
144 :
145 : }
|