Line data Source code
1 : /**
2 : Copyright (c) 2024 Stappler LLC <admin@stappler.dev>
3 :
4 : Permission is hereby granted, free of charge, to any person obtaining a copy
5 : of this software and associated documentation files (the "Software"), to deal
6 : in the Software without restriction, including without limitation the rights
7 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 : copies of the Software, and to permit persons to whom the Software is
9 : furnished to do so, subject to the following conditions:
10 :
11 : The above copyright notice and this permission notice shall be included in
12 : all copies or substantial portions of the Software.
13 :
14 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 : THE SOFTWARE.
21 : **/
22 :
23 : #ifndef CORE_SEARCH_SPSEARCHQUERY_H_
24 : #define CORE_SEARCH_SPSEARCHQUERY_H_
25 :
26 : #include "SPSearchParser.h"
27 :
28 : namespace STAPPLER_VERSIONIZED stappler::search {
29 :
30 : enum class Normalization {
31 : Default = 0,
32 : DocLengthLog = 1, // divides the rank by 1 + the logarithm of the document length
33 : DocLength = 2, // divides the rank by the document length
34 : UniqueWordsCount = 8, // divides the rank by the number of unique words in document
35 : UniqueWordsCountLog = 16, // divides the rank by 1 + the logarithm of the number of unique words in document
36 : Self = 32 // divides the rank by itself + 1
37 : };
38 :
39 : SP_DEFINE_ENUM_AS_MASK(Normalization)
40 :
41 : struct RankingValues {
42 : float A = 1.0f;
43 : float B = 0.4f;
44 : float C = 0.2f;
45 : float D = 0.1f;
46 :
47 : // Linearly interpolated from first to last word in document
48 : // (so, last word score will be RANK * positionFactor
49 : // word in a middle: (RANK * (1.0 + positionFactor) / 2.0)
50 : // 1.0 - do not apply position-based score
51 : //
52 : // Not implemented in PostgreSQL engine, has no effect
53 : float positionFactor = 1.0f;
54 :
55 1150 : float rank(SearchRank r) const {
56 1150 : switch (r) {
57 25 : case SearchRank::A: return A; break;
58 525 : case SearchRank::B: return B; break;
59 25 : case SearchRank::C: return C; break;
60 25 : case SearchRank::D: return D; break;
61 525 : case SearchRank::Unknown: return D; break;
62 : }
63 25 : return D;
64 : }
65 : };
66 :
67 : struct SearchVector {
68 : using MatchVector = Vector<Pair<size_t, SearchData::Rank>>;
69 :
70 : size_t documentLength = 0;
71 : Map<StringView, MatchVector> words;
72 :
73 3300 : bool empty() const { return words.empty(); }
74 : };
75 :
76 : struct SearchQuery {
77 : enum Block : uint8_t {
78 : None,
79 : Parentesis,
80 : Quoted,
81 : };
82 :
83 : enum Format {
84 : Stappler,
85 : Postgresql,
86 : };
87 :
88 : Block block = None;
89 : SearchOp op = SearchOp::None;
90 : bool neg = false;
91 : uint32_t offset = 0;
92 : String value;
93 : StringView source;
94 : Vector<SearchQuery> args;
95 :
96 3725 : SearchQuery() = default;
97 : SearchQuery(StringView value, uint32_t offset = 1, StringView source = StringView());
98 : SearchQuery(SearchOp, StringView);
99 :
100 550 : bool empty() const { return (op == SearchOp::None && value.empty()) || (op != SearchOp::None && args.empty()); }
101 :
102 : void clear();
103 : void encode(const Callback<void(StringView)> &, Format = Stappler) const;
104 :
105 : void describe(std::ostream &stream, size_t depth = 0) const;
106 : void foreach(const Callback<void(StringView value, StringView source)> &) const;
107 :
108 : bool isMatch(const SearchVector &) const;
109 :
110 : // used with opaque index format from `Configuration::encodeSearchVectorData`
111 : bool isMatch(const BytesView &) const;
112 :
113 : float rankQuery(const SearchVector &, Normalization = Normalization::Default, RankingValues = RankingValues()) const;
114 :
115 : // used with opaque index format from `Configuration::encodeSearchVectorData`
116 : float rankQuery(const BytesView &, Normalization = Normalization::Default, RankingValues = RankingValues()) const;
117 :
118 : void normalize();
119 :
120 : void decompose(const Callback<void(StringView)> &positive, const Callback<void(StringView)> &negative) const;
121 : };
122 :
123 : }
124 :
125 : #endif /* CORE_SEARCH_SPSEARCHQUERY_H_ */
|