Line data Source code
1 : /**
2 : Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
3 :
4 : Permission is hereby granted, free of charge, to any person obtaining a copy
5 : of this software and associated documentation files (the "Software"), to deal
6 : in the Software without restriction, including without limitation the rights
7 : to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 : copies of the Software, and to permit persons to whom the Software is
9 : furnished to do so, subject to the following conditions:
10 :
11 : The above copyright notice and this permission notice shall be included in
12 : all copies or substantial portions of the Software.
13 :
14 : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 : IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 : FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 : AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 : LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 : OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 : THE SOFTWARE.
21 : **/
22 :
23 : #ifndef CORE_TESS_SPTESSSIMD_HPP_
24 : #define CORE_TESS_SPTESSSIMD_HPP_
25 :
26 : #include "SPSIMD.h"
27 :
28 : namespace STAPPLER_VERSIONIZED stappler::geom {
29 :
30 : SP_ATTR_OPTIMIZE_INLINE_FN static inline simd::f32x4 getNormalizedVec(const float v0[2], const float v1[2], const float v2[2]) {
31 : simd::f32x4 targetVec = simd::sub(
32 3438354 : simd::load(v0[0], v0[1], v2[0], v2[1]),
33 3438354 : simd::load(v1[0], v1[1], v1[0], v1[1])); // x0, y0, x1, y1
34 :
35 : simd::f32x4 squaredVec = simd::mul(targetVec, targetVec); // x0 * x0, y0 * y0, x1 * x1, y1 * y1
36 :
37 : return simd::mul(targetVec,
38 : simd::rsqrt( simd::add(squaredVec,
39 3438354 : simde_mm_shuffle_ps(squaredVec, squaredVec, SIMDE_MM_SHUFFLE(2, 3, 0, 1))) ) // sse_rsqrt: n0, n0, n1, n1
40 : ); // nx0, ny0, nx1, ny1
41 : }
42 :
43 : SP_ATTR_OPTIMIZE_INLINE_FN static inline simd::f32x4 getBisectVec(const simd::f32x4 &normVec) {
44 : simd::f32x4 tmpNHalf = simd::load(-0.5f);
45 3438365 : simd::f32x4 normShuffleVec = simde_mm_shuffle_ps(normVec, normVec, SIMDE_MM_SHUFFLE(0, 1, 3, 2)); // nx1, ny1, ny0, nx0
46 :
47 : simd::f32x4 dotVec = simd::mul(normVec, normShuffleVec); // nx0 * nx1, ny0 * ny1, nx1 * ny0, ny1 * nx0
48 :
49 : simd::f32x4 crossVec = simd::sub(
50 : simde_mm_movehl_ps(dotVec, dotVec),
51 3438364 : simde_mm_shuffle_ps(dotVec, dotVec, SIMDE_MM_SHUFFLE(2, 3, 2, 3)));
52 :
53 3438361 : if (simde_mm_comieq_ss(crossVec, simde_mm_setzero_ps())) [[unlikely]] {
54 :
55 : // normal is -ny1, nx1, length is 1
56 :
57 : // return
58 : // 0.0f
59 : // 1.0f
60 : // -ny1
61 : // nx1
62 :
63 : return simd::mul(
64 : simd::load(0.0f, 1.0f, -1.0f, 1.0f),
65 483525 : simde_mm_shuffle_ps(simd::load(1.0f), normVec, SIMDE_MM_SHUFFLE(2, 3, 1, 0))
66 : );
67 : } else {
68 : // -0.5 + -0.5
69 : // nx0 * nx1 + ny0 * ny1
70 : // nx0 + nx1
71 : // ny0 + ny1
72 : simd::f32x4 normTarget = simd::add(
73 : simde_mm_movelh_ps(simde_mm_unpacklo_ps(tmpNHalf, dotVec), normVec), // -0.5, nx0 * nx1, nx0, ny0,
74 : simde_mm_movelh_ps(simde_mm_move_ss(dotVec, tmpNHalf), normShuffleVec) // -0.5, ny0 * ny1, nx1, ny1,
75 : ); // -1.0 , dot , tx (nx0 + nx1) , ty (ny0 + ny1)
76 :
77 : // -0.5 * -1.0
78 : // -0.5 * dotValue
79 : // tx * tx
80 : // ty * ty
81 :
82 : simd::f32x4 squaredVec = simd::mul(simde_mm_movehl_ps(normTarget, tmpNHalf), normTarget); // 0.5, -0.5 * dot, tx * tx, ty * ty
83 :
84 : // combined normalizing, length calculation, ccw test
85 : // ccw = nx1 * ny0 - ny1 * nx0
86 : // len = 1.0 * rsqrt( 0.5 + (-0.5 * dot)
87 : // ntx = tx * rsqrt( tx*tx + ty*ty )
88 : // nty = ty * rsqrt( tx*tx + ty*ty )
89 :
90 : return simde_mm_move_ss(
91 : simd::mul(simde_mm_movehl_ps(normTarget, simd::load(1.0f)),
92 : simd::rsqrt(
93 : simd::add(
94 : squaredVec,
95 2954839 : simde_mm_shuffle_ps(squaredVec, squaredVec, SIMDE_MM_SHUFFLE(2, 3, 0, 1))
96 : )
97 : )
98 : ),
99 : crossVec
100 : );
101 : }
102 : }
103 :
104 : // combined normalizing, length calculation, ccw test for 3 vertexes
105 : // ccw = nx1 * ny0 - ny1 * nx0
106 : // len = 1.0 * rsqrt( 0.5 + (-0.5 * dot)
107 : // ntx = tx * rsqrt( tx*tx + ty*ty )
108 : // nty = ty * rsqrt( tx*tx + ty*ty )
109 :
110 3438354 : SP_ATTR_OPTIMIZE_FN static void getVertexNormal(const float v0[2], const float v1[], const float v2[], float result[4]) {
111 : simd::f32x4 normVec = getNormalizedVec(v0, v1, v2);
112 : simd::f32x4 bisectVec = getBisectVec(normVec);
113 : simd::store(result, bisectVec);
114 3438378 : }
115 :
116 : }
117 :
118 : #endif /* CORE_TESS_SPTESSSIMD_HPP_ */
|