LCOV - code coverage report
Current view: top level - core/tess - SPTessSimd.hpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 10 10 100.0 %
Date: 2024-05-12 00:16:13 Functions: 1 1 100.0 %

          Line data    Source code
       1             : /**
       2             :  Copyright (c) 2023 Stappler LLC <admin@stappler.dev>
       3             : 
       4             :  Permission is hereby granted, free of charge, to any person obtaining a copy
       5             :  of this software and associated documentation files (the "Software"), to deal
       6             :  in the Software without restriction, including without limitation the rights
       7             :  to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
       8             :  copies of the Software, and to permit persons to whom the Software is
       9             :  furnished to do so, subject to the following conditions:
      10             : 
      11             :  The above copyright notice and this permission notice shall be included in
      12             :  all copies or substantial portions of the Software.
      13             : 
      14             :  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
      15             :  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
      16             :  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
      17             :  AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
      18             :  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
      19             :  OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
      20             :  THE SOFTWARE.
      21             :  **/
      22             : 
      23             : #ifndef CORE_TESS_SPTESSSIMD_HPP_
      24             : #define CORE_TESS_SPTESSSIMD_HPP_
      25             : 
      26             : #include "SPSIMD.h"
      27             : 
      28             : namespace STAPPLER_VERSIONIZED stappler::geom {
      29             : 
      30             : SP_ATTR_OPTIMIZE_INLINE_FN static inline simd::f32x4 getNormalizedVec(const float v0[2], const float v1[2], const float v2[2]) {
      31             :         simd::f32x4 targetVec = simd::sub(
      32     3438354 :                 simd::load(v0[0], v0[1], v2[0], v2[1]),
      33     3438354 :                 simd::load(v1[0], v1[1], v1[0], v1[1])); // x0, y0, x1, y1
      34             : 
      35             :         simd::f32x4 squaredVec = simd::mul(targetVec, targetVec); // x0 * x0, y0 * y0, x1 * x1, y1 * y1
      36             : 
      37             :         return simd::mul(targetVec,
      38             :                 simd::rsqrt( simd::add(squaredVec,
      39     3438354 :                         simde_mm_shuffle_ps(squaredVec, squaredVec, SIMDE_MM_SHUFFLE(2, 3, 0, 1))) ) // sse_rsqrt: n0, n0, n1, n1
      40             :                 ); // nx0, ny0, nx1, ny1
      41             : }
      42             : 
      43             : SP_ATTR_OPTIMIZE_INLINE_FN static inline simd::f32x4 getBisectVec(const simd::f32x4 &normVec) {
      44             :         simd::f32x4 tmpNHalf = simd::load(-0.5f);
      45     3438365 :         simd::f32x4 normShuffleVec = simde_mm_shuffle_ps(normVec, normVec, SIMDE_MM_SHUFFLE(0, 1, 3, 2)); // nx1, ny1, ny0, nx0
      46             : 
      47             :         simd::f32x4 dotVec = simd::mul(normVec, normShuffleVec); // nx0 * nx1, ny0 * ny1, nx1 * ny0, ny1 * nx0
      48             : 
      49             :         simd::f32x4 crossVec = simd::sub(
      50             :                 simde_mm_movehl_ps(dotVec, dotVec),
      51     3438364 :                 simde_mm_shuffle_ps(dotVec, dotVec, SIMDE_MM_SHUFFLE(2, 3, 2, 3)));
      52             : 
      53     3438361 :         if (simde_mm_comieq_ss(crossVec, simde_mm_setzero_ps())) [[unlikely]] {
      54             : 
      55             :                 // normal is -ny1, nx1, length is 1
      56             : 
      57             :                 // return
      58             :                 // 0.0f
      59             :                 // 1.0f
      60             :                 // -ny1
      61             :                 // nx1
      62             : 
      63             :                 return simd::mul(
      64             :                         simd::load(0.0f, 1.0f, -1.0f, 1.0f),
      65      483525 :                         simde_mm_shuffle_ps(simd::load(1.0f), normVec, SIMDE_MM_SHUFFLE(2, 3, 1, 0))
      66             :                 );
      67             :         } else {
      68             :                 // -0.5      + -0.5
      69             :                 // nx0 * nx1 + ny0 * ny1
      70             :                 // nx0       + nx1
      71             :                 // ny0       + ny1
      72             :                 simd::f32x4 normTarget = simd::add(
      73             :                         simde_mm_movelh_ps(simde_mm_unpacklo_ps(tmpNHalf, dotVec), normVec),   // -0.5, nx0 * nx1, nx0, ny0,
      74             :                         simde_mm_movelh_ps(simde_mm_move_ss(dotVec, tmpNHalf), normShuffleVec) // -0.5, ny0 * ny1, nx1, ny1,
      75             :                 ); // -1.0 , dot , tx (nx0 + nx1) , ty (ny0 + ny1)
      76             : 
      77             :                 // -0.5  * -1.0
      78             :                 // -0.5 * dotValue
      79             :                 // tx   * tx
      80             :                 // ty   * ty
      81             : 
      82             :                 simd::f32x4 squaredVec = simd::mul(simde_mm_movehl_ps(normTarget, tmpNHalf), normTarget); // 0.5, -0.5 * dot, tx * tx, ty * ty
      83             : 
      84             :                 // combined normalizing, length calculation, ccw test
      85             :                 // ccw = nx1 * ny0 - ny1 * nx0
      86             :                 // len = 1.0 * rsqrt( 0.5   + (-0.5 * dot)
      87             :                 // ntx = tx *  rsqrt( tx*tx +  ty*ty )
      88             :                 // nty = ty *  rsqrt( tx*tx +  ty*ty )
      89             : 
      90             :                 return simde_mm_move_ss(
      91             :                         simd::mul(simde_mm_movehl_ps(normTarget, simd::load(1.0f)),
      92             :                                 simd::rsqrt(
      93             :                                         simd::add(
      94             :                                                 squaredVec,
      95     2954839 :                                                 simde_mm_shuffle_ps(squaredVec, squaredVec, SIMDE_MM_SHUFFLE(2, 3, 0, 1))
      96             :                                         )
      97             :                                 )
      98             :                         ),
      99             :                         crossVec
     100             :                 );
     101             :         }
     102             : }
     103             : 
     104             : // combined normalizing, length calculation, ccw test for 3 vertexes
     105             : // ccw = nx1 * ny0 - ny1 * nx0
     106             : // len = 1.0 * rsqrt( 0.5   + (-0.5 * dot)
     107             : // ntx = tx *  rsqrt( tx*tx +  ty*ty )
     108             : // nty = ty *  rsqrt( tx*tx +  ty*ty )
     109             : 
     110     3438354 : SP_ATTR_OPTIMIZE_FN static void getVertexNormal(const float v0[2], const float v1[], const float v2[], float result[4]) {
     111             :         simd::f32x4 normVec = getNormalizedVec(v0, v1, v2);
     112             :         simd::f32x4 bisectVec = getBisectVec(normVec);
     113             :         simd::store(result, bisectVec);
     114     3438378 : }
     115             : 
     116             : }
     117             : 
     118             : #endif /* CORE_TESS_SPTESSSIMD_HPP_ */

Generated by: LCOV version 1.14