Line data Source code
1 : /**
2 : Copyright (c) 2016-2022 Roman Katuntsev <sbkarr@stappler.org>
3 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
4 :
5 : Permission is hereby granted, free of charge, to any person obtaining a copy
6 : of this software and associated documentation files (the "Software"), to deal
7 : in the Software without restriction, including without limitation the rights
8 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 : copies of the Software, and to permit persons to whom the Software is
10 : furnished to do so, subject to the following conditions:
11 :
12 : The above copyright notice and this permission notice shall be included in
13 : all copies or substantial portions of the Software.
14 :
15 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 : THE SOFTWARE.
22 : **/
23 :
24 : #include "SPData.h"
25 : #include "SPString.h"
26 : #include "SPStringView.h"
27 :
28 : #ifdef MODULE_STAPPLER_FILESYSTEM
29 : #include "SPFilesystem.h"
30 : #endif
31 :
32 : #define LZ4_HC_STATIC_LINKING_ONLY 1
33 : #include "lz4/lib/lz4hc.h"
34 :
35 : #ifdef MODULE_STAPPLER_BROTLI_LIB
36 : #include "brotli/encode.h"
37 : #include "brotli/decode.h"
38 : #else
39 : #warning Module 'stappler_brotli_lib' is not enabled, data::Value built without Brotli compression support
40 : #endif
41 :
42 : namespace STAPPLER_VERSIONIZED stappler::data {
43 :
44 : EncodeFormat EncodeFormat::CborCompressed(EncodeFormat::Cbor, EncodeFormat::LZ4HCCompression);
45 : EncodeFormat EncodeFormat::JsonCompressed(EncodeFormat::Json, EncodeFormat::LZ4HCCompression);
46 :
47 : int EncodeFormat::EncodeStreamIndex = std::ios_base::xalloc();
48 :
49 : namespace serenity {
50 :
51 0 : bool shouldEncodePercent(char c) {
52 : #define V16 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
53 : static uint8_t s_decTable[256] = {
54 : V16, V16, // 0-1, 0-F
55 : 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, // 2, 0-F
56 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, // 3, 0-F
57 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 4, 0-F
58 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, // 5, 0-F
59 : 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 6, 0-F
60 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, // 7, 0-F
61 : V16, V16, V16, V16, V16, V16, V16, V16,
62 : };
63 :
64 0 : return bool(s_decTable[*((uint8_t *)(&c))]);
65 : }
66 :
67 : }
68 :
69 : template <>
70 : template <>
71 0 : auto ValueTemplate<memory::PoolInterface>::convert<memory::PoolInterface>() const -> ValueTemplate<memory::PoolInterface> {
72 0 : return ValueTemplate<memory::PoolInterface>(*this);
73 : }
74 :
75 : template <>
76 : template <>
77 0 : auto ValueTemplate<memory::StandartInterface>::convert<memory::StandartInterface>() const -> ValueTemplate<memory::StandartInterface> {
78 0 : return ValueTemplate<memory::StandartInterface>(*this);
79 : }
80 :
81 : template <>
82 : template <>
83 0 : auto ValueTemplate<memory::PoolInterface>::convert<memory::StandartInterface>() const -> ValueTemplate<memory::StandartInterface> {
84 0 : switch (_type) {
85 0 : case Type::INTEGER: return ValueTemplate<memory::StandartInterface>(intVal); break;
86 0 : case Type::DOUBLE: return ValueTemplate<memory::StandartInterface>(doubleVal); break;
87 0 : case Type::BOOLEAN: return ValueTemplate<memory::StandartInterface>(boolVal); break;
88 0 : case Type::CHARSTRING:
89 0 : return ValueTemplate<memory::StandartInterface>(memory::StandartInterface::StringType(strVal->data(), strVal->size()));
90 : break;
91 0 : case Type::BYTESTRING:
92 0 : return ValueTemplate<memory::StandartInterface>(memory::StandartInterface::BytesType(bytesVal->data(), bytesVal->data() + bytesVal->size()));
93 : break;
94 0 : case Type::ARRAY: {
95 0 : ValueTemplate<memory::StandartInterface> ret(ValueTemplate<memory::StandartInterface>::Type::ARRAY);
96 0 : auto &arr = ret.asArray();
97 0 : arr.reserve(arrayVal->size());
98 0 : for (auto &it : *arrayVal) {
99 0 : arr.emplace_back(it.convert<memory::StandartInterface>());
100 : }
101 0 : return ret;
102 : break;
103 0 : }
104 0 : case Type::DICTIONARY: {
105 0 : ValueTemplate<memory::StandartInterface> ret(ValueTemplate<memory::StandartInterface>::Type::DICTIONARY);
106 0 : auto &dict = ret.asDict();
107 0 : for (auto &it : *dictVal) {
108 0 : dict.emplace(StringView(it.first).str<memory::StandartInterface>(), it.second.convert<memory::StandartInterface>());
109 : }
110 0 : return ret;
111 : break;
112 0 : }
113 0 : default:
114 0 : break;
115 : }
116 0 : return ValueTemplate<memory::StandartInterface>();
117 : }
118 :
119 : template <>
120 : template <>
121 0 : auto ValueTemplate<memory::StandartInterface>::convert<memory::PoolInterface>() const -> ValueTemplate<memory::PoolInterface> {
122 0 : switch (_type) {
123 0 : case Type::INTEGER: return ValueTemplate<memory::PoolInterface>(intVal); break;
124 0 : case Type::DOUBLE: return ValueTemplate<memory::PoolInterface>(doubleVal); break;
125 0 : case Type::BOOLEAN: return ValueTemplate<memory::PoolInterface>(boolVal); break;
126 0 : case Type::CHARSTRING:
127 0 : return ValueTemplate<memory::PoolInterface>(memory::PoolInterface::StringType(strVal->data(), strVal->size()));
128 : break;
129 0 : case Type::BYTESTRING:
130 0 : return ValueTemplate<memory::PoolInterface>(memory::PoolInterface::BytesType(bytesVal->data(), bytesVal->data() + bytesVal->size()));
131 : break;
132 0 : case Type::ARRAY: {
133 0 : ValueTemplate<memory::PoolInterface> ret(ValueTemplate<memory::PoolInterface>::Type::ARRAY);
134 0 : auto &arr = ret.asArray();
135 0 : arr.reserve(arrayVal->size());
136 0 : for (auto &it : *arrayVal) {
137 0 : arr.emplace_back(it.convert<memory::PoolInterface>());
138 : }
139 0 : return ret;
140 : break;
141 0 : }
142 0 : case Type::DICTIONARY: {
143 0 : ValueTemplate<memory::PoolInterface> ret(ValueTemplate<memory::PoolInterface>::Type::DICTIONARY);
144 0 : auto &dict = ret.asDict();
145 0 : dict.reserve(dictVal->size());
146 0 : for (auto &it : *dictVal) {
147 0 : dict.emplace(StringView(it.first).str<memory::PoolInterface>(), it.second.convert<memory::PoolInterface>());
148 : }
149 0 : return ret;
150 : break;
151 0 : }
152 0 : default:
153 0 : break;
154 : }
155 0 : return ValueTemplate<memory::PoolInterface>();
156 : }
157 :
158 4575 : size_t getCompressBounds(size_t size, EncodeFormat::Compression c) {
159 4575 : switch (c) {
160 3750 : case EncodeFormat::LZ4Compression:
161 : case EncodeFormat::LZ4HCCompression: {
162 3750 : if (size < LZ4_MAX_INPUT_SIZE) {
163 3750 : return LZ4_compressBound(size) + ((size <= 0xFFFF) ? 2 : 4);
164 : }
165 0 : return 0;
166 : break;
167 : }
168 : #ifdef MODULE_STAPPLER_BROTLI_LIB
169 825 : case EncodeFormat::Brotli:
170 825 : if (size < LZ4_MAX_INPUT_SIZE) {
171 825 : return BrotliEncoderMaxCompressedSize(size) + ((size <= 0xFFFF) ? 2 : 4);
172 : }
173 0 : return 0;
174 : break;
175 : #endif
176 0 : case EncodeFormat::NoCompression:
177 0 : break;
178 : }
179 0 : return 0;
180 : }
181 :
182 : thread_local uint8_t tl_lz4HCEncodeState[std::max(sizeof(LZ4_streamHC_t), sizeof(LZ4_stream_t))];
183 : thread_local uint8_t tl_compressBuffer[128_KiB];
184 :
185 0 : uint8_t *getLZ4EncodeState() {
186 0 : return tl_lz4HCEncodeState;
187 : }
188 :
189 4575 : size_t compressData(const uint8_t *src, size_t srcSize, uint8_t *dest, size_t destSize, EncodeFormat::Compression c) {
190 4575 : switch (c) {
191 800 : case EncodeFormat::LZ4Compression: {
192 800 : const int offSize = ((srcSize <= 0xFFFF) ? 2 : 4);
193 800 : const int ret = LZ4_compress_fast_extState(tl_lz4HCEncodeState, (const char *)src, (char *)dest + offSize, srcSize, destSize - offSize, 1);
194 800 : if (ret > 0) {
195 800 : if (srcSize <= 0xFFFF) {
196 0 : uint16_t sz = srcSize;
197 0 : memcpy(dest, &sz, sizeof(sz));
198 : } else {
199 800 : uint32_t sz = srcSize;
200 800 : memcpy(dest, &sz, sizeof(sz));
201 : }
202 800 : return ret + offSize;
203 : }
204 0 : break;
205 : }
206 2950 : case EncodeFormat::LZ4HCCompression: {
207 2950 : const int offSize = ((srcSize <= 0xFFFF) ? 2 : 4);
208 2950 : const int ret = LZ4_compress_HC_extStateHC(tl_lz4HCEncodeState, (const char *)src, (char *)dest + offSize, srcSize, destSize - offSize, LZ4HC_CLEVEL_MAX);
209 2950 : if (ret > 0) {
210 2950 : if (srcSize <= 0xFFFF) {
211 2150 : uint16_t sz = srcSize;
212 2150 : memcpy(dest, &sz, sizeof(sz));
213 : } else {
214 800 : uint32_t sz = srcSize;
215 800 : memcpy(dest, &sz, sizeof(sz));
216 : }
217 2950 : return ret + offSize;
218 : }
219 0 : break;
220 : }
221 : #ifdef MODULE_STAPPLER_BROTLI_LIB
222 825 : case EncodeFormat::Brotli: {
223 825 : const int offSize = ((srcSize <= 0xFFFF) ? 2 : 4);
224 825 : size_t ret = destSize - offSize;
225 1650 : if (BrotliEncoderCompress(10, BROTLI_MAX_WINDOW_BITS, BROTLI_DEFAULT_MODE,
226 825 : srcSize, (const uint8_t *)src, &ret,dest + offSize) == BROTLI_TRUE) {
227 825 : if (srcSize <= 0xFFFF) {
228 25 : uint16_t sz = srcSize;
229 25 : memcpy(dest, &sz, sizeof(sz));
230 : } else {
231 800 : uint32_t sz = srcSize;
232 800 : memcpy(dest, &sz, sizeof(sz));
233 : }
234 825 : return ret + offSize;
235 : }
236 0 : break;
237 : }
238 : #endif
239 0 : case EncodeFormat::NoCompression:
240 0 : break;
241 : }
242 0 : return 0;
243 : }
244 :
245 2525 : void writeCompressionMark(uint8_t *data, size_t sourceSize, EncodeFormat::Compression c, uint8_t padding) {
246 2525 : switch (c) {
247 1700 : case EncodeFormat::LZ4Compression:
248 : case EncodeFormat::LZ4HCCompression:
249 1700 : if (sourceSize <= 0xFFFF) {
250 100 : switch (padding) {
251 50 : case 0: memcpy(data, "LZ4S", 4); break;
252 0 : case 1: memcpy(data, "LZ4T", 4); break;
253 25 : case 2: memcpy(data, "LZ4U", 4); break;
254 25 : case 3: memcpy(data, "LZ4V", 4); break;
255 : }
256 : } else {
257 1600 : switch (padding) {
258 500 : case 0: memcpy(data, "LZ4W", 4); break;
259 400 : case 1: memcpy(data, "LZ4X", 4); break;
260 500 : case 2: memcpy(data, "LZ4Y", 4); break;
261 200 : case 3: memcpy(data, "LZ4Z", 4); break;
262 : }
263 : }
264 1700 : break;
265 : #ifdef MODULE_STAPPLER_BROTLI_LIB
266 825 : case EncodeFormat::Brotli:
267 825 : if (sourceSize <= 0xFFFF) {
268 25 : switch (padding) {
269 25 : case 0: memcpy(data, "SBrS", 4); break;
270 0 : case 1: memcpy(data, "SBrT", 4); break;
271 0 : case 2: memcpy(data, "SBrU", 4); break;
272 0 : case 3: memcpy(data, "SBrV", 4); break;
273 : }
274 : } else {
275 800 : switch (padding) {
276 200 : case 0: memcpy(data, "SBrW", 4); break;
277 200 : case 1: memcpy(data, "SBrX", 4); break;
278 200 : case 2: memcpy(data, "SBrY", 4); break;
279 200 : case 3: memcpy(data, "SBrZ", 4); break;
280 : }
281 : }
282 825 : break;
283 : #endif
284 0 : case EncodeFormat::NoCompression:
285 0 : break;
286 : }
287 2525 : }
288 :
289 : template <typename Interface>
290 4575 : static inline auto doCompress(const uint8_t *src, size_t size, EncodeFormat::Compression c, bool conditional) -> typename Interface::BytesType {
291 4575 : auto bufferSize = getCompressBounds(size, c);
292 4575 : if (bufferSize == 0) {
293 0 : return typename Interface::BytesType();
294 4575 : } else if (bufferSize <= sizeof(tl_compressBuffer)) {
295 3075 : auto encodeSize = compressData(src, size, tl_compressBuffer, sizeof(tl_compressBuffer), c);
296 3075 : if (encodeSize == 0 || (conditional && encodeSize + 4 > size)) { return typename Interface::BytesType(); }
297 1025 : auto targetSize = encodeSize + 4;
298 1025 : auto targetExtra = 4 - (targetSize) % sizeof(uint32_t);
299 1025 : targetSize += ((targetExtra == 4) ? 0 : targetExtra);
300 1025 : typename Interface::BytesType ret; ret.resize(targetSize);
301 1025 : writeCompressionMark(ret.data(), size, c, (targetExtra == 4) ? 0 : targetExtra);
302 1025 : memcpy(ret.data() + 4, tl_compressBuffer, encodeSize);
303 1025 : return ret;
304 1025 : } else {
305 1500 : typename Interface::BytesType ret; ret.resize(bufferSize + 4);
306 1500 : auto encodeSize = compressData(src, size, ret.data() + 4, bufferSize, c);
307 1500 : if (encodeSize == 0 || (conditional && encodeSize + 4 > size)) { return typename Interface::BytesType(); }
308 1500 : auto targetSize = encodeSize + 4;
309 1500 : auto targetExtra = 4 - (targetSize) % sizeof(uint32_t);
310 1500 : writeCompressionMark(ret.data(), size, c, (targetExtra == 4) ? 0 : targetExtra);
311 1500 : targetSize += ((targetExtra == 4) ? 0 : targetExtra);
312 1500 : ret.resize(targetSize);
313 1500 : ret.shrink_to_fit();
314 1500 : return ret;
315 1500 : }
316 : return typename Interface::BytesType();
317 : }
318 :
319 : template <>
320 3050 : auto compress<memory::PoolInterface>(const uint8_t *src, size_t size, EncodeFormat::Compression c, bool conditional) -> memory::PoolInterface::BytesType {
321 3050 : return doCompress<memory::PoolInterface>(src, size, c, conditional);
322 : }
323 :
324 : template <>
325 1525 : auto compress<memory::StandartInterface>(const uint8_t *src, size_t size, EncodeFormat::Compression c, bool conditional) -> memory::StandartInterface::BytesType {
326 1525 : return doCompress<memory::StandartInterface>(src, size, c, conditional);
327 : }
328 :
329 : using decompress_ptr = const uint8_t *;
330 :
331 1425 : static bool doDecompressLZ4Frame(const uint8_t *src, size_t srcSize, uint8_t *dest, size_t destSize) {
332 1425 : return LZ4_decompress_safe((const char *)src, (char *)dest, srcSize, destSize) > 0;
333 : }
334 :
335 : template <typename Interface>
336 1275 : static inline auto doDecompressLZ4(BytesView data, bool sh) -> ValueTemplate<Interface> {
337 1275 : size_t size = sh ? data.readUnsigned16() : data.readUnsigned32();
338 :
339 1275 : ValueTemplate<Interface> ret;
340 1275 : if (size <= sizeof(tl_compressBuffer)) {
341 525 : if (doDecompressLZ4Frame(data.data(), data.size(), tl_compressBuffer, size)) {
342 525 : ret = data::read<Interface>(BytesView(tl_compressBuffer, size));
343 : }
344 : } else {
345 750 : typename Interface::BytesType res; res.resize(size);
346 750 : if (doDecompressLZ4Frame(data.data(), data.size(), res.data(), size)) {
347 750 : ret = data::read<Interface>(res);
348 : }
349 750 : }
350 1275 : return ret;
351 0 : }
352 :
353 : template <>
354 850 : auto decompressLZ4(const uint8_t *srcPtr, size_t srcSize, bool sh) -> ValueTemplate<memory::PoolInterface> {
355 850 : return doDecompressLZ4<memory::PoolInterface>(BytesView(srcPtr, srcSize), sh);
356 : }
357 :
358 : template <>
359 425 : auto decompressLZ4(const uint8_t *srcPtr, size_t srcSize, bool sh) -> ValueTemplate<memory::StandartInterface> {
360 425 : return doDecompressLZ4<memory::StandartInterface>(BytesView(srcPtr, srcSize), sh);
361 : }
362 :
363 : #ifdef MODULE_STAPPLER_BROTLI_LIB
364 625 : static bool doDecompressBrotliFrame(const uint8_t *src, size_t srcSize, uint8_t *dest, size_t destSize) {
365 625 : size_t ret = destSize;
366 625 : return BrotliDecoderDecompress(srcSize, src, &ret, dest) == BROTLI_DECODER_RESULT_SUCCESS;
367 : }
368 : template <typename Interface>
369 625 : static inline auto doDecompressBrotli(BytesView data, bool sh) -> ValueTemplate<Interface> {
370 625 : size_t size = sh ? data.readUnsigned16() : data.readUnsigned32();
371 :
372 625 : ValueTemplate<Interface> ret;
373 625 : if (size <= sizeof(tl_compressBuffer)) {
374 250 : if (doDecompressBrotliFrame(data.data(), data.size(), tl_compressBuffer, size)) {
375 250 : ret = data::read<Interface>(BytesView(tl_compressBuffer, size));
376 : }
377 : } else {
378 375 : typename Interface::BytesType res; res.resize(size);
379 375 : if (doDecompressBrotliFrame(data.data(), data.size(), res.data(), size)) {
380 375 : ret = data::read<Interface>(res);
381 : }
382 375 : }
383 625 : return ret;
384 0 : }
385 :
386 : template <>
387 400 : auto decompressBrotli(const uint8_t *srcPtr, size_t srcSize, bool sh) -> ValueTemplate<memory::PoolInterface> {
388 400 : return doDecompressBrotli<memory::PoolInterface>(BytesView(srcPtr, srcSize), sh);
389 : }
390 :
391 : template <>
392 225 : auto decompressBrotli(const uint8_t *srcPtr, size_t srcSize, bool sh) -> ValueTemplate<memory::StandartInterface> {
393 225 : return doDecompressBrotli<memory::StandartInterface>(BytesView(srcPtr, srcSize), sh);
394 : }
395 :
396 : #endif
397 :
398 296 : size_t decompress(const uint8_t *d, size_t size, uint8_t *dstData, size_t dstSize) {
399 296 : size_t ret = 0;
400 296 : BytesView data(d, size);
401 297 : uint8_t padding = 0;
402 297 : auto ff = detectDataFormat(data.data(), data.size(), padding);
403 297 : switch (ff) {
404 0 : case DataFormat::LZ4_Short: {
405 0 : data += 4;
406 0 : ret = data.readUnsigned16();
407 0 : if (dstData) {
408 0 : if (dstSize >= ret) {
409 0 : if (!doDecompressLZ4Frame(data.data(), data.size() - padding, dstData, ret)) {
410 0 : ret = 0;
411 : }
412 : } else {
413 0 : ret = 0;
414 : }
415 : }
416 0 : break;
417 : }
418 297 : case DataFormat::LZ4_Word: {
419 297 : data += 4;
420 298 : ret = data.readUnsigned32();
421 297 : if (dstData) {
422 150 : if (dstSize >= ret) {
423 150 : if (!doDecompressLZ4Frame(data.data(), data.size() - padding, dstData, ret)) {
424 0 : ret = 0;
425 : }
426 : } else {
427 0 : ret = 0;
428 : }
429 : }
430 298 : break;
431 : }
432 : #ifdef MODULE_STAPPLER_BROTLI_LIB
433 0 : case DataFormat::Brotli_Short: {
434 0 : data += 4;
435 0 : ret = data.readUnsigned16();
436 0 : if (dstData) {
437 0 : if (dstSize >= ret) {
438 0 : if (!doDecompressBrotliFrame(data.data(), data.size() - padding, dstData, ret)) {
439 0 : ret = 0;
440 : }
441 : } else {
442 0 : ret = 0;
443 : }
444 : }
445 0 : break;
446 : }
447 0 : case DataFormat::Brotli_Word: {
448 0 : data += 4;
449 0 : ret = data.readUnsigned32();
450 0 : if (dstData) {
451 0 : if (dstSize >= ret) {
452 0 : if (!doDecompressBrotliFrame(data.data(), data.size() - padding, dstData, ret)) {
453 0 : ret = 0;
454 : }
455 : } else {
456 0 : ret = 0;
457 : }
458 : }
459 0 : break;
460 : }
461 : #endif
462 0 : default: break;
463 : }
464 298 : return ret;
465 : }
466 :
467 0 : size_t getDecompressedSize(const uint8_t *d, size_t size) {
468 0 : return decompress(d, size, nullptr, 0);
469 : }
470 :
471 : }
|