openCV library for Renesas RZ/A

Dependents:   RZ_A2M_Mbed_samples

Committer:
RyoheiHagimoto
Date:
Fri Jan 29 04:53:38 2021 +0000
Revision:
0:0e0631af0305
copied from https://github.com/d-kato/opencv-lib.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
RyoheiHagimoto 0:0e0631af0305 1 /*M///////////////////////////////////////////////////////////////////////////////////////
RyoheiHagimoto 0:0e0631af0305 2 //
RyoheiHagimoto 0:0e0631af0305 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
RyoheiHagimoto 0:0e0631af0305 4 //
RyoheiHagimoto 0:0e0631af0305 5 // By downloading, copying, installing or using the software you agree to this license.
RyoheiHagimoto 0:0e0631af0305 6 // If you do not agree to this license, do not download, install,
RyoheiHagimoto 0:0e0631af0305 7 // copy or use the software.
RyoheiHagimoto 0:0e0631af0305 8 //
RyoheiHagimoto 0:0e0631af0305 9 //
RyoheiHagimoto 0:0e0631af0305 10 // License Agreement
RyoheiHagimoto 0:0e0631af0305 11 // For Open Source Computer Vision Library
RyoheiHagimoto 0:0e0631af0305 12 //
RyoheiHagimoto 0:0e0631af0305 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
RyoheiHagimoto 0:0e0631af0305 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
RyoheiHagimoto 0:0e0631af0305 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
RyoheiHagimoto 0:0e0631af0305 16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
RyoheiHagimoto 0:0e0631af0305 17 // Third party copyrights are property of their respective owners.
RyoheiHagimoto 0:0e0631af0305 18 //
RyoheiHagimoto 0:0e0631af0305 19 // Redistribution and use in source and binary forms, with or without modification,
RyoheiHagimoto 0:0e0631af0305 20 // are permitted provided that the following conditions are met:
RyoheiHagimoto 0:0e0631af0305 21 //
RyoheiHagimoto 0:0e0631af0305 22 // * Redistribution's of source code must retain the above copyright notice,
RyoheiHagimoto 0:0e0631af0305 23 // this list of conditions and the following disclaimer.
RyoheiHagimoto 0:0e0631af0305 24 //
RyoheiHagimoto 0:0e0631af0305 25 // * Redistribution's in binary form must reproduce the above copyright notice,
RyoheiHagimoto 0:0e0631af0305 26 // this list of conditions and the following disclaimer in the documentation
RyoheiHagimoto 0:0e0631af0305 27 // and/or other materials provided with the distribution.
RyoheiHagimoto 0:0e0631af0305 28 //
RyoheiHagimoto 0:0e0631af0305 29 // * The name of the copyright holders may not be used to endorse or promote products
RyoheiHagimoto 0:0e0631af0305 30 // derived from this software without specific prior written permission.
RyoheiHagimoto 0:0e0631af0305 31 //
RyoheiHagimoto 0:0e0631af0305 32 // This software is provided by the copyright holders and contributors "as is" and
RyoheiHagimoto 0:0e0631af0305 33 // any express or implied warranties, including, but not limited to, the implied
RyoheiHagimoto 0:0e0631af0305 34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
RyoheiHagimoto 0:0e0631af0305 35 // In no event shall the Intel Corporation or contributors be liable for any direct,
RyoheiHagimoto 0:0e0631af0305 36 // indirect, incidental, special, exemplary, or consequential damages
RyoheiHagimoto 0:0e0631af0305 37 // (including, but not limited to, procurement of substitute goods or services;
RyoheiHagimoto 0:0e0631af0305 38 // loss of use, data, or profits; or business interruption) however caused
RyoheiHagimoto 0:0e0631af0305 39 // and on any theory of liability, whether in contract, strict liability,
RyoheiHagimoto 0:0e0631af0305 40 // or tort (including negligence or otherwise) arising in any way out of
RyoheiHagimoto 0:0e0631af0305 41 // the use of this software, even if advised of the possibility of such damage.
RyoheiHagimoto 0:0e0631af0305 42 //
RyoheiHagimoto 0:0e0631af0305 43 //M*/
RyoheiHagimoto 0:0e0631af0305 44
RyoheiHagimoto 0:0e0631af0305 45 #ifndef OPENCV_HAL_INTRIN_NEON_HPP
RyoheiHagimoto 0:0e0631af0305 46 #define OPENCV_HAL_INTRIN_NEON_HPP
RyoheiHagimoto 0:0e0631af0305 47
RyoheiHagimoto 0:0e0631af0305 48 #include <algorithm>
RyoheiHagimoto 0:0e0631af0305 49 #include "opencv2/core/utility.hpp"
RyoheiHagimoto 0:0e0631af0305 50
RyoheiHagimoto 0:0e0631af0305 51 namespace cv
RyoheiHagimoto 0:0e0631af0305 52 {
RyoheiHagimoto 0:0e0631af0305 53
RyoheiHagimoto 0:0e0631af0305 54 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 55
RyoheiHagimoto 0:0e0631af0305 56 #define CV_SIMD128 1
RyoheiHagimoto 0:0e0631af0305 57 #if defined(__aarch64__)
RyoheiHagimoto 0:0e0631af0305 58 #define CV_SIMD128_64F 1
RyoheiHagimoto 0:0e0631af0305 59 #else
RyoheiHagimoto 0:0e0631af0305 60 #define CV_SIMD128_64F 0
RyoheiHagimoto 0:0e0631af0305 61 #endif
RyoheiHagimoto 0:0e0631af0305 62
RyoheiHagimoto 0:0e0631af0305 63 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 64 #define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \
RyoheiHagimoto 0:0e0631af0305 65 template <typename T> static inline \
RyoheiHagimoto 0:0e0631af0305 66 _Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \
RyoheiHagimoto 0:0e0631af0305 67 template <typename T> static inline \
RyoheiHagimoto 0:0e0631af0305 68 float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; }
RyoheiHagimoto 0:0e0631af0305 69 OPENCV_HAL_IMPL_NEON_REINTERPRET(uint8x16_t, u8)
RyoheiHagimoto 0:0e0631af0305 70 OPENCV_HAL_IMPL_NEON_REINTERPRET(int8x16_t, s8)
RyoheiHagimoto 0:0e0631af0305 71 OPENCV_HAL_IMPL_NEON_REINTERPRET(uint16x8_t, u16)
RyoheiHagimoto 0:0e0631af0305 72 OPENCV_HAL_IMPL_NEON_REINTERPRET(int16x8_t, s16)
RyoheiHagimoto 0:0e0631af0305 73 OPENCV_HAL_IMPL_NEON_REINTERPRET(uint32x4_t, u32)
RyoheiHagimoto 0:0e0631af0305 74 OPENCV_HAL_IMPL_NEON_REINTERPRET(int32x4_t, s32)
RyoheiHagimoto 0:0e0631af0305 75 OPENCV_HAL_IMPL_NEON_REINTERPRET(uint64x2_t, u64)
RyoheiHagimoto 0:0e0631af0305 76 OPENCV_HAL_IMPL_NEON_REINTERPRET(int64x2_t, s64)
RyoheiHagimoto 0:0e0631af0305 77 OPENCV_HAL_IMPL_NEON_REINTERPRET(float32x4_t, f32)
RyoheiHagimoto 0:0e0631af0305 78 #endif
RyoheiHagimoto 0:0e0631af0305 79
RyoheiHagimoto 0:0e0631af0305 80 struct v_uint8x16
RyoheiHagimoto 0:0e0631af0305 81 {
RyoheiHagimoto 0:0e0631af0305 82 typedef uchar lane_type;
RyoheiHagimoto 0:0e0631af0305 83 enum { nlanes = 16 };
RyoheiHagimoto 0:0e0631af0305 84
RyoheiHagimoto 0:0e0631af0305 85 v_uint8x16() {}
RyoheiHagimoto 0:0e0631af0305 86 explicit v_uint8x16(uint8x16_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 87 v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7,
RyoheiHagimoto 0:0e0631af0305 88 uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15)
RyoheiHagimoto 0:0e0631af0305 89 {
RyoheiHagimoto 0:0e0631af0305 90 uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
RyoheiHagimoto 0:0e0631af0305 91 val = vld1q_u8(v);
RyoheiHagimoto 0:0e0631af0305 92 }
RyoheiHagimoto 0:0e0631af0305 93 uchar get0() const
RyoheiHagimoto 0:0e0631af0305 94 {
RyoheiHagimoto 0:0e0631af0305 95 return vgetq_lane_u8(val, 0);
RyoheiHagimoto 0:0e0631af0305 96 }
RyoheiHagimoto 0:0e0631af0305 97
RyoheiHagimoto 0:0e0631af0305 98 uint8x16_t val;
RyoheiHagimoto 0:0e0631af0305 99 };
RyoheiHagimoto 0:0e0631af0305 100
RyoheiHagimoto 0:0e0631af0305 101 struct v_int8x16
RyoheiHagimoto 0:0e0631af0305 102 {
RyoheiHagimoto 0:0e0631af0305 103 typedef schar lane_type;
RyoheiHagimoto 0:0e0631af0305 104 enum { nlanes = 16 };
RyoheiHagimoto 0:0e0631af0305 105
RyoheiHagimoto 0:0e0631af0305 106 v_int8x16() {}
RyoheiHagimoto 0:0e0631af0305 107 explicit v_int8x16(int8x16_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 108 v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7,
RyoheiHagimoto 0:0e0631af0305 109 schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15)
RyoheiHagimoto 0:0e0631af0305 110 {
RyoheiHagimoto 0:0e0631af0305 111 schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15};
RyoheiHagimoto 0:0e0631af0305 112 val = vld1q_s8(v);
RyoheiHagimoto 0:0e0631af0305 113 }
RyoheiHagimoto 0:0e0631af0305 114 schar get0() const
RyoheiHagimoto 0:0e0631af0305 115 {
RyoheiHagimoto 0:0e0631af0305 116 return vgetq_lane_s8(val, 0);
RyoheiHagimoto 0:0e0631af0305 117 }
RyoheiHagimoto 0:0e0631af0305 118
RyoheiHagimoto 0:0e0631af0305 119 int8x16_t val;
RyoheiHagimoto 0:0e0631af0305 120 };
RyoheiHagimoto 0:0e0631af0305 121
RyoheiHagimoto 0:0e0631af0305 122 struct v_uint16x8
RyoheiHagimoto 0:0e0631af0305 123 {
RyoheiHagimoto 0:0e0631af0305 124 typedef ushort lane_type;
RyoheiHagimoto 0:0e0631af0305 125 enum { nlanes = 8 };
RyoheiHagimoto 0:0e0631af0305 126
RyoheiHagimoto 0:0e0631af0305 127 v_uint16x8() {}
RyoheiHagimoto 0:0e0631af0305 128 explicit v_uint16x8(uint16x8_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 129 v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7)
RyoheiHagimoto 0:0e0631af0305 130 {
RyoheiHagimoto 0:0e0631af0305 131 ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
RyoheiHagimoto 0:0e0631af0305 132 val = vld1q_u16(v);
RyoheiHagimoto 0:0e0631af0305 133 }
RyoheiHagimoto 0:0e0631af0305 134 ushort get0() const
RyoheiHagimoto 0:0e0631af0305 135 {
RyoheiHagimoto 0:0e0631af0305 136 return vgetq_lane_u16(val, 0);
RyoheiHagimoto 0:0e0631af0305 137 }
RyoheiHagimoto 0:0e0631af0305 138
RyoheiHagimoto 0:0e0631af0305 139 uint16x8_t val;
RyoheiHagimoto 0:0e0631af0305 140 };
RyoheiHagimoto 0:0e0631af0305 141
RyoheiHagimoto 0:0e0631af0305 142 struct v_int16x8
RyoheiHagimoto 0:0e0631af0305 143 {
RyoheiHagimoto 0:0e0631af0305 144 typedef short lane_type;
RyoheiHagimoto 0:0e0631af0305 145 enum { nlanes = 8 };
RyoheiHagimoto 0:0e0631af0305 146
RyoheiHagimoto 0:0e0631af0305 147 v_int16x8() {}
RyoheiHagimoto 0:0e0631af0305 148 explicit v_int16x8(int16x8_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 149 v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7)
RyoheiHagimoto 0:0e0631af0305 150 {
RyoheiHagimoto 0:0e0631af0305 151 short v[] = {v0, v1, v2, v3, v4, v5, v6, v7};
RyoheiHagimoto 0:0e0631af0305 152 val = vld1q_s16(v);
RyoheiHagimoto 0:0e0631af0305 153 }
RyoheiHagimoto 0:0e0631af0305 154 short get0() const
RyoheiHagimoto 0:0e0631af0305 155 {
RyoheiHagimoto 0:0e0631af0305 156 return vgetq_lane_s16(val, 0);
RyoheiHagimoto 0:0e0631af0305 157 }
RyoheiHagimoto 0:0e0631af0305 158
RyoheiHagimoto 0:0e0631af0305 159 int16x8_t val;
RyoheiHagimoto 0:0e0631af0305 160 };
RyoheiHagimoto 0:0e0631af0305 161
RyoheiHagimoto 0:0e0631af0305 162 struct v_uint32x4
RyoheiHagimoto 0:0e0631af0305 163 {
RyoheiHagimoto 0:0e0631af0305 164 typedef unsigned lane_type;
RyoheiHagimoto 0:0e0631af0305 165 enum { nlanes = 4 };
RyoheiHagimoto 0:0e0631af0305 166
RyoheiHagimoto 0:0e0631af0305 167 v_uint32x4() {}
RyoheiHagimoto 0:0e0631af0305 168 explicit v_uint32x4(uint32x4_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 169 v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3)
RyoheiHagimoto 0:0e0631af0305 170 {
RyoheiHagimoto 0:0e0631af0305 171 unsigned v[] = {v0, v1, v2, v3};
RyoheiHagimoto 0:0e0631af0305 172 val = vld1q_u32(v);
RyoheiHagimoto 0:0e0631af0305 173 }
RyoheiHagimoto 0:0e0631af0305 174 unsigned get0() const
RyoheiHagimoto 0:0e0631af0305 175 {
RyoheiHagimoto 0:0e0631af0305 176 return vgetq_lane_u32(val, 0);
RyoheiHagimoto 0:0e0631af0305 177 }
RyoheiHagimoto 0:0e0631af0305 178
RyoheiHagimoto 0:0e0631af0305 179 uint32x4_t val;
RyoheiHagimoto 0:0e0631af0305 180 };
RyoheiHagimoto 0:0e0631af0305 181
RyoheiHagimoto 0:0e0631af0305 182 struct v_int32x4
RyoheiHagimoto 0:0e0631af0305 183 {
RyoheiHagimoto 0:0e0631af0305 184 typedef int lane_type;
RyoheiHagimoto 0:0e0631af0305 185 enum { nlanes = 4 };
RyoheiHagimoto 0:0e0631af0305 186
RyoheiHagimoto 0:0e0631af0305 187 v_int32x4() {}
RyoheiHagimoto 0:0e0631af0305 188 explicit v_int32x4(int32x4_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 189 v_int32x4(int v0, int v1, int v2, int v3)
RyoheiHagimoto 0:0e0631af0305 190 {
RyoheiHagimoto 0:0e0631af0305 191 int v[] = {v0, v1, v2, v3};
RyoheiHagimoto 0:0e0631af0305 192 val = vld1q_s32(v);
RyoheiHagimoto 0:0e0631af0305 193 }
RyoheiHagimoto 0:0e0631af0305 194 int get0() const
RyoheiHagimoto 0:0e0631af0305 195 {
RyoheiHagimoto 0:0e0631af0305 196 return vgetq_lane_s32(val, 0);
RyoheiHagimoto 0:0e0631af0305 197 }
RyoheiHagimoto 0:0e0631af0305 198 int32x4_t val;
RyoheiHagimoto 0:0e0631af0305 199 };
RyoheiHagimoto 0:0e0631af0305 200
RyoheiHagimoto 0:0e0631af0305 201 struct v_float32x4
RyoheiHagimoto 0:0e0631af0305 202 {
RyoheiHagimoto 0:0e0631af0305 203 typedef float lane_type;
RyoheiHagimoto 0:0e0631af0305 204 enum { nlanes = 4 };
RyoheiHagimoto 0:0e0631af0305 205
RyoheiHagimoto 0:0e0631af0305 206 v_float32x4() {}
RyoheiHagimoto 0:0e0631af0305 207 explicit v_float32x4(float32x4_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 208 v_float32x4(float v0, float v1, float v2, float v3)
RyoheiHagimoto 0:0e0631af0305 209 {
RyoheiHagimoto 0:0e0631af0305 210 float v[] = {v0, v1, v2, v3};
RyoheiHagimoto 0:0e0631af0305 211 val = vld1q_f32(v);
RyoheiHagimoto 0:0e0631af0305 212 }
RyoheiHagimoto 0:0e0631af0305 213 float get0() const
RyoheiHagimoto 0:0e0631af0305 214 {
RyoheiHagimoto 0:0e0631af0305 215 return vgetq_lane_f32(val, 0);
RyoheiHagimoto 0:0e0631af0305 216 }
RyoheiHagimoto 0:0e0631af0305 217 float32x4_t val;
RyoheiHagimoto 0:0e0631af0305 218 };
RyoheiHagimoto 0:0e0631af0305 219
RyoheiHagimoto 0:0e0631af0305 220 struct v_uint64x2
RyoheiHagimoto 0:0e0631af0305 221 {
RyoheiHagimoto 0:0e0631af0305 222 typedef uint64 lane_type;
RyoheiHagimoto 0:0e0631af0305 223 enum { nlanes = 2 };
RyoheiHagimoto 0:0e0631af0305 224
RyoheiHagimoto 0:0e0631af0305 225 v_uint64x2() {}
RyoheiHagimoto 0:0e0631af0305 226 explicit v_uint64x2(uint64x2_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 227 v_uint64x2(unsigned v0, unsigned v1)
RyoheiHagimoto 0:0e0631af0305 228 {
RyoheiHagimoto 0:0e0631af0305 229 uint64 v[] = {v0, v1};
RyoheiHagimoto 0:0e0631af0305 230 val = vld1q_u64(v);
RyoheiHagimoto 0:0e0631af0305 231 }
RyoheiHagimoto 0:0e0631af0305 232 uint64 get0() const
RyoheiHagimoto 0:0e0631af0305 233 {
RyoheiHagimoto 0:0e0631af0305 234 return vgetq_lane_u64(val, 0);
RyoheiHagimoto 0:0e0631af0305 235 }
RyoheiHagimoto 0:0e0631af0305 236 uint64x2_t val;
RyoheiHagimoto 0:0e0631af0305 237 };
RyoheiHagimoto 0:0e0631af0305 238
RyoheiHagimoto 0:0e0631af0305 239 struct v_int64x2
RyoheiHagimoto 0:0e0631af0305 240 {
RyoheiHagimoto 0:0e0631af0305 241 typedef int64 lane_type;
RyoheiHagimoto 0:0e0631af0305 242 enum { nlanes = 2 };
RyoheiHagimoto 0:0e0631af0305 243
RyoheiHagimoto 0:0e0631af0305 244 v_int64x2() {}
RyoheiHagimoto 0:0e0631af0305 245 explicit v_int64x2(int64x2_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 246 v_int64x2(int v0, int v1)
RyoheiHagimoto 0:0e0631af0305 247 {
RyoheiHagimoto 0:0e0631af0305 248 int64 v[] = {v0, v1};
RyoheiHagimoto 0:0e0631af0305 249 val = vld1q_s64(v);
RyoheiHagimoto 0:0e0631af0305 250 }
RyoheiHagimoto 0:0e0631af0305 251 int64 get0() const
RyoheiHagimoto 0:0e0631af0305 252 {
RyoheiHagimoto 0:0e0631af0305 253 return vgetq_lane_s64(val, 0);
RyoheiHagimoto 0:0e0631af0305 254 }
RyoheiHagimoto 0:0e0631af0305 255 int64x2_t val;
RyoheiHagimoto 0:0e0631af0305 256 };
RyoheiHagimoto 0:0e0631af0305 257
RyoheiHagimoto 0:0e0631af0305 258 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 259 struct v_float64x2
RyoheiHagimoto 0:0e0631af0305 260 {
RyoheiHagimoto 0:0e0631af0305 261 typedef double lane_type;
RyoheiHagimoto 0:0e0631af0305 262 enum { nlanes = 2 };
RyoheiHagimoto 0:0e0631af0305 263
RyoheiHagimoto 0:0e0631af0305 264 v_float64x2() {}
RyoheiHagimoto 0:0e0631af0305 265 explicit v_float64x2(float64x2_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 266 v_float64x2(double v0, double v1)
RyoheiHagimoto 0:0e0631af0305 267 {
RyoheiHagimoto 0:0e0631af0305 268 double v[] = {v0, v1};
RyoheiHagimoto 0:0e0631af0305 269 val = vld1q_f64(v);
RyoheiHagimoto 0:0e0631af0305 270 }
RyoheiHagimoto 0:0e0631af0305 271 double get0() const
RyoheiHagimoto 0:0e0631af0305 272 {
RyoheiHagimoto 0:0e0631af0305 273 return vgetq_lane_f64(val, 0);
RyoheiHagimoto 0:0e0631af0305 274 }
RyoheiHagimoto 0:0e0631af0305 275 float64x2_t val;
RyoheiHagimoto 0:0e0631af0305 276 };
RyoheiHagimoto 0:0e0631af0305 277 #endif
RyoheiHagimoto 0:0e0631af0305 278
RyoheiHagimoto 0:0e0631af0305 279 #if defined (HAVE_FP16)
RyoheiHagimoto 0:0e0631af0305 280 // Workaround for old comiplers
RyoheiHagimoto 0:0e0631af0305 281 template <typename T> static inline int16x4_t vreinterpret_s16_f16(T a)
RyoheiHagimoto 0:0e0631af0305 282 { return (int16x4_t)a; }
RyoheiHagimoto 0:0e0631af0305 283 template <typename T> static inline float16x4_t vreinterpret_f16_s16(T a)
RyoheiHagimoto 0:0e0631af0305 284 { return (float16x4_t)a; }
RyoheiHagimoto 0:0e0631af0305 285 template <typename T> static inline float16x4_t vld1_f16(const T* ptr)
RyoheiHagimoto 0:0e0631af0305 286 { return vreinterpret_f16_s16(vld1_s16((const short*)ptr)); }
RyoheiHagimoto 0:0e0631af0305 287 template <typename T> static inline void vst1_f16(T* ptr, float16x4_t a)
RyoheiHagimoto 0:0e0631af0305 288 { vst1_s16((short*)ptr, vreinterpret_s16_f16(a)); }
RyoheiHagimoto 0:0e0631af0305 289
RyoheiHagimoto 0:0e0631af0305 290 struct v_float16x4
RyoheiHagimoto 0:0e0631af0305 291 {
RyoheiHagimoto 0:0e0631af0305 292 typedef short lane_type;
RyoheiHagimoto 0:0e0631af0305 293 enum { nlanes = 4 };
RyoheiHagimoto 0:0e0631af0305 294
RyoheiHagimoto 0:0e0631af0305 295 v_float16x4() {}
RyoheiHagimoto 0:0e0631af0305 296 explicit v_float16x4(float16x4_t v) : val(v) {}
RyoheiHagimoto 0:0e0631af0305 297 v_float16x4(short v0, short v1, short v2, short v3)
RyoheiHagimoto 0:0e0631af0305 298 {
RyoheiHagimoto 0:0e0631af0305 299 short v[] = {v0, v1, v2, v3};
RyoheiHagimoto 0:0e0631af0305 300 val = vld1_f16(v);
RyoheiHagimoto 0:0e0631af0305 301 }
RyoheiHagimoto 0:0e0631af0305 302 short get0() const
RyoheiHagimoto 0:0e0631af0305 303 {
RyoheiHagimoto 0:0e0631af0305 304 return vget_lane_s16(vreinterpret_s16_f16(val), 0);
RyoheiHagimoto 0:0e0631af0305 305 }
RyoheiHagimoto 0:0e0631af0305 306 float16x4_t val;
RyoheiHagimoto 0:0e0631af0305 307 };
RyoheiHagimoto 0:0e0631af0305 308 #endif
RyoheiHagimoto 0:0e0631af0305 309
RyoheiHagimoto 0:0e0631af0305 310 #define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \
RyoheiHagimoto 0:0e0631af0305 311 inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \
RyoheiHagimoto 0:0e0631af0305 312 inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \
RyoheiHagimoto 0:0e0631af0305 313 inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \
RyoheiHagimoto 0:0e0631af0305 314 inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 315 inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 316 inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(vreinterpretq_u16_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 317 inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(vreinterpretq_s16_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 318 inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(vreinterpretq_u32_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 319 inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(vreinterpretq_s32_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 320 inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(vreinterpretq_u64_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 321 inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(vreinterpretq_s64_##suffix(v.val)); } \
RyoheiHagimoto 0:0e0631af0305 322 inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(vreinterpretq_f32_##suffix(v.val)); }
RyoheiHagimoto 0:0e0631af0305 323
RyoheiHagimoto 0:0e0631af0305 324 OPENCV_HAL_IMPL_NEON_INIT(uint8x16, uchar, u8)
RyoheiHagimoto 0:0e0631af0305 325 OPENCV_HAL_IMPL_NEON_INIT(int8x16, schar, s8)
RyoheiHagimoto 0:0e0631af0305 326 OPENCV_HAL_IMPL_NEON_INIT(uint16x8, ushort, u16)
RyoheiHagimoto 0:0e0631af0305 327 OPENCV_HAL_IMPL_NEON_INIT(int16x8, short, s16)
RyoheiHagimoto 0:0e0631af0305 328 OPENCV_HAL_IMPL_NEON_INIT(uint32x4, unsigned, u32)
RyoheiHagimoto 0:0e0631af0305 329 OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32)
RyoheiHagimoto 0:0e0631af0305 330 OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64)
RyoheiHagimoto 0:0e0631af0305 331 OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64)
RyoheiHagimoto 0:0e0631af0305 332 OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32)
RyoheiHagimoto 0:0e0631af0305 333 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 334 #define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \
RyoheiHagimoto 0:0e0631af0305 335 inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); }
RyoheiHagimoto 0:0e0631af0305 336 OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64)
RyoheiHagimoto 0:0e0631af0305 337 OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8)
RyoheiHagimoto 0:0e0631af0305 338 OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8)
RyoheiHagimoto 0:0e0631af0305 339 OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16)
RyoheiHagimoto 0:0e0631af0305 340 OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16)
RyoheiHagimoto 0:0e0631af0305 341 OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32)
RyoheiHagimoto 0:0e0631af0305 342 OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32)
RyoheiHagimoto 0:0e0631af0305 343 OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64)
RyoheiHagimoto 0:0e0631af0305 344 OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64)
RyoheiHagimoto 0:0e0631af0305 345 OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32)
RyoheiHagimoto 0:0e0631af0305 346 OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64)
RyoheiHagimoto 0:0e0631af0305 347 #endif
RyoheiHagimoto 0:0e0631af0305 348
RyoheiHagimoto 0:0e0631af0305 349 #define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, wsuffix, pack, op) \
RyoheiHagimoto 0:0e0631af0305 350 inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \
RyoheiHagimoto 0:0e0631af0305 351 { \
RyoheiHagimoto 0:0e0631af0305 352 hreg a1 = vqmov##op##_##wsuffix(a.val), b1 = vqmov##op##_##wsuffix(b.val); \
RyoheiHagimoto 0:0e0631af0305 353 return _Tpvec(vcombine_##suffix(a1, b1)); \
RyoheiHagimoto 0:0e0631af0305 354 } \
RyoheiHagimoto 0:0e0631af0305 355 inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
RyoheiHagimoto 0:0e0631af0305 356 { \
RyoheiHagimoto 0:0e0631af0305 357 hreg a1 = vqmov##op##_##wsuffix(a.val); \
RyoheiHagimoto 0:0e0631af0305 358 vst1_##suffix(ptr, a1); \
RyoheiHagimoto 0:0e0631af0305 359 } \
RyoheiHagimoto 0:0e0631af0305 360 template<int n> inline \
RyoheiHagimoto 0:0e0631af0305 361 _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \
RyoheiHagimoto 0:0e0631af0305 362 { \
RyoheiHagimoto 0:0e0631af0305 363 hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \
RyoheiHagimoto 0:0e0631af0305 364 hreg b1 = vqrshr##op##_n_##wsuffix(b.val, n); \
RyoheiHagimoto 0:0e0631af0305 365 return _Tpvec(vcombine_##suffix(a1, b1)); \
RyoheiHagimoto 0:0e0631af0305 366 } \
RyoheiHagimoto 0:0e0631af0305 367 template<int n> inline \
RyoheiHagimoto 0:0e0631af0305 368 void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \
RyoheiHagimoto 0:0e0631af0305 369 { \
RyoheiHagimoto 0:0e0631af0305 370 hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \
RyoheiHagimoto 0:0e0631af0305 371 vst1_##suffix(ptr, a1); \
RyoheiHagimoto 0:0e0631af0305 372 }
RyoheiHagimoto 0:0e0631af0305 373
RyoheiHagimoto 0:0e0631af0305 374 OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, u16, pack, n)
RyoheiHagimoto 0:0e0631af0305 375 OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, n)
RyoheiHagimoto 0:0e0631af0305 376 OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n)
RyoheiHagimoto 0:0e0631af0305 377 OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n)
RyoheiHagimoto 0:0e0631af0305 378 OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n)
RyoheiHagimoto 0:0e0631af0305 379 OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n)
RyoheiHagimoto 0:0e0631af0305 380
RyoheiHagimoto 0:0e0631af0305 381 OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un)
RyoheiHagimoto 0:0e0631af0305 382 OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un)
RyoheiHagimoto 0:0e0631af0305 383
RyoheiHagimoto 0:0e0631af0305 384 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
RyoheiHagimoto 0:0e0631af0305 385 const v_float32x4& m1, const v_float32x4& m2,
RyoheiHagimoto 0:0e0631af0305 386 const v_float32x4& m3)
RyoheiHagimoto 0:0e0631af0305 387 {
RyoheiHagimoto 0:0e0631af0305 388 float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val);
RyoheiHagimoto 0:0e0631af0305 389 float32x4_t res = vmulq_lane_f32(m0.val, vl, 0);
RyoheiHagimoto 0:0e0631af0305 390 res = vmlaq_lane_f32(res, m1.val, vl, 1);
RyoheiHagimoto 0:0e0631af0305 391 res = vmlaq_lane_f32(res, m2.val, vh, 0);
RyoheiHagimoto 0:0e0631af0305 392 res = vmlaq_lane_f32(res, m3.val, vh, 1);
RyoheiHagimoto 0:0e0631af0305 393 return v_float32x4(res);
RyoheiHagimoto 0:0e0631af0305 394 }
RyoheiHagimoto 0:0e0631af0305 395
RyoheiHagimoto 0:0e0631af0305 396 #define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \
RyoheiHagimoto 0:0e0631af0305 397 inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 398 { \
RyoheiHagimoto 0:0e0631af0305 399 return _Tpvec(intrin(a.val, b.val)); \
RyoheiHagimoto 0:0e0631af0305 400 } \
RyoheiHagimoto 0:0e0631af0305 401 inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 402 { \
RyoheiHagimoto 0:0e0631af0305 403 a.val = intrin(a.val, b.val); \
RyoheiHagimoto 0:0e0631af0305 404 return a; \
RyoheiHagimoto 0:0e0631af0305 405 }
RyoheiHagimoto 0:0e0631af0305 406
RyoheiHagimoto 0:0e0631af0305 407 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8)
RyoheiHagimoto 0:0e0631af0305 408 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8)
RyoheiHagimoto 0:0e0631af0305 409 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8)
RyoheiHagimoto 0:0e0631af0305 410 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8)
RyoheiHagimoto 0:0e0631af0305 411 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16)
RyoheiHagimoto 0:0e0631af0305 412 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16)
RyoheiHagimoto 0:0e0631af0305 413 OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16)
RyoheiHagimoto 0:0e0631af0305 414 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16)
RyoheiHagimoto 0:0e0631af0305 415 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16)
RyoheiHagimoto 0:0e0631af0305 416 OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16)
RyoheiHagimoto 0:0e0631af0305 417 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32)
RyoheiHagimoto 0:0e0631af0305 418 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32)
RyoheiHagimoto 0:0e0631af0305 419 OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32)
RyoheiHagimoto 0:0e0631af0305 420 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint32x4, vaddq_u32)
RyoheiHagimoto 0:0e0631af0305 421 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint32x4, vsubq_u32)
RyoheiHagimoto 0:0e0631af0305 422 OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint32x4, vmulq_u32)
RyoheiHagimoto 0:0e0631af0305 423 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float32x4, vaddq_f32)
RyoheiHagimoto 0:0e0631af0305 424 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float32x4, vsubq_f32)
RyoheiHagimoto 0:0e0631af0305 425 OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float32x4, vmulq_f32)
RyoheiHagimoto 0:0e0631af0305 426 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64)
RyoheiHagimoto 0:0e0631af0305 427 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64)
RyoheiHagimoto 0:0e0631af0305 428 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64)
RyoheiHagimoto 0:0e0631af0305 429 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64)
RyoheiHagimoto 0:0e0631af0305 430 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 431 OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32)
RyoheiHagimoto 0:0e0631af0305 432 OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64)
RyoheiHagimoto 0:0e0631af0305 433 OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64)
RyoheiHagimoto 0:0e0631af0305 434 OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64)
RyoheiHagimoto 0:0e0631af0305 435 OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64)
RyoheiHagimoto 0:0e0631af0305 436 #else
RyoheiHagimoto 0:0e0631af0305 437 inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b)
RyoheiHagimoto 0:0e0631af0305 438 {
RyoheiHagimoto 0:0e0631af0305 439 float32x4_t reciprocal = vrecpeq_f32(b.val);
RyoheiHagimoto 0:0e0631af0305 440 reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
RyoheiHagimoto 0:0e0631af0305 441 reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
RyoheiHagimoto 0:0e0631af0305 442 return v_float32x4(vmulq_f32(a.val, reciprocal));
RyoheiHagimoto 0:0e0631af0305 443 }
RyoheiHagimoto 0:0e0631af0305 444 inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b)
RyoheiHagimoto 0:0e0631af0305 445 {
RyoheiHagimoto 0:0e0631af0305 446 float32x4_t reciprocal = vrecpeq_f32(b.val);
RyoheiHagimoto 0:0e0631af0305 447 reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
RyoheiHagimoto 0:0e0631af0305 448 reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal);
RyoheiHagimoto 0:0e0631af0305 449 a.val = vmulq_f32(a.val, reciprocal);
RyoheiHagimoto 0:0e0631af0305 450 return a;
RyoheiHagimoto 0:0e0631af0305 451 }
RyoheiHagimoto 0:0e0631af0305 452 #endif
RyoheiHagimoto 0:0e0631af0305 453
RyoheiHagimoto 0:0e0631af0305 454 inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b,
RyoheiHagimoto 0:0e0631af0305 455 v_int32x4& c, v_int32x4& d)
RyoheiHagimoto 0:0e0631af0305 456 {
RyoheiHagimoto 0:0e0631af0305 457 c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
RyoheiHagimoto 0:0e0631af0305 458 d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
RyoheiHagimoto 0:0e0631af0305 459 }
RyoheiHagimoto 0:0e0631af0305 460
RyoheiHagimoto 0:0e0631af0305 461 inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b,
RyoheiHagimoto 0:0e0631af0305 462 v_uint32x4& c, v_uint32x4& d)
RyoheiHagimoto 0:0e0631af0305 463 {
RyoheiHagimoto 0:0e0631af0305 464 c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val));
RyoheiHagimoto 0:0e0631af0305 465 d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val));
RyoheiHagimoto 0:0e0631af0305 466 }
RyoheiHagimoto 0:0e0631af0305 467
RyoheiHagimoto 0:0e0631af0305 468 inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b,
RyoheiHagimoto 0:0e0631af0305 469 v_uint64x2& c, v_uint64x2& d)
RyoheiHagimoto 0:0e0631af0305 470 {
RyoheiHagimoto 0:0e0631af0305 471 c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val));
RyoheiHagimoto 0:0e0631af0305 472 d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val));
RyoheiHagimoto 0:0e0631af0305 473 }
RyoheiHagimoto 0:0e0631af0305 474
RyoheiHagimoto 0:0e0631af0305 475 inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b)
RyoheiHagimoto 0:0e0631af0305 476 {
RyoheiHagimoto 0:0e0631af0305 477 int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val));
RyoheiHagimoto 0:0e0631af0305 478 int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val));
RyoheiHagimoto 0:0e0631af0305 479 int32x4x2_t cd = vuzpq_s32(c, d);
RyoheiHagimoto 0:0e0631af0305 480 return v_int32x4(vaddq_s32(cd.val[0], cd.val[1]));
RyoheiHagimoto 0:0e0631af0305 481 }
RyoheiHagimoto 0:0e0631af0305 482
RyoheiHagimoto 0:0e0631af0305 483 #define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \
RyoheiHagimoto 0:0e0631af0305 484 OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \
RyoheiHagimoto 0:0e0631af0305 485 OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \
RyoheiHagimoto 0:0e0631af0305 486 OPENCV_HAL_IMPL_NEON_BIN_OP(^, _Tpvec, veorq_##suffix) \
RyoheiHagimoto 0:0e0631af0305 487 inline _Tpvec operator ~ (const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 488 { \
RyoheiHagimoto 0:0e0631af0305 489 return _Tpvec(vreinterpretq_##suffix##_u8(vmvnq_u8(vreinterpretq_u8_##suffix(a.val)))); \
RyoheiHagimoto 0:0e0631af0305 490 }
RyoheiHagimoto 0:0e0631af0305 491
RyoheiHagimoto 0:0e0631af0305 492 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint8x16, u8)
RyoheiHagimoto 0:0e0631af0305 493 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int8x16, s8)
RyoheiHagimoto 0:0e0631af0305 494 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint16x8, u16)
RyoheiHagimoto 0:0e0631af0305 495 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int16x8, s16)
RyoheiHagimoto 0:0e0631af0305 496 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint32x4, u32)
RyoheiHagimoto 0:0e0631af0305 497 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int32x4, s32)
RyoheiHagimoto 0:0e0631af0305 498 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint64x2, u64)
RyoheiHagimoto 0:0e0631af0305 499 OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int64x2, s64)
RyoheiHagimoto 0:0e0631af0305 500
RyoheiHagimoto 0:0e0631af0305 501 #define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(bin_op, intrin) \
RyoheiHagimoto 0:0e0631af0305 502 inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \
RyoheiHagimoto 0:0e0631af0305 503 { \
RyoheiHagimoto 0:0e0631af0305 504 return v_float32x4(vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val)))); \
RyoheiHagimoto 0:0e0631af0305 505 } \
RyoheiHagimoto 0:0e0631af0305 506 inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \
RyoheiHagimoto 0:0e0631af0305 507 { \
RyoheiHagimoto 0:0e0631af0305 508 a.val = vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val))); \
RyoheiHagimoto 0:0e0631af0305 509 return a; \
RyoheiHagimoto 0:0e0631af0305 510 }
RyoheiHagimoto 0:0e0631af0305 511
RyoheiHagimoto 0:0e0631af0305 512 OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(&, vandq_s32)
RyoheiHagimoto 0:0e0631af0305 513 OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(|, vorrq_s32)
RyoheiHagimoto 0:0e0631af0305 514 OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(^, veorq_s32)
RyoheiHagimoto 0:0e0631af0305 515
RyoheiHagimoto 0:0e0631af0305 516 inline v_float32x4 operator ~ (const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 517 {
RyoheiHagimoto 0:0e0631af0305 518 return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val))));
RyoheiHagimoto 0:0e0631af0305 519 }
RyoheiHagimoto 0:0e0631af0305 520
RyoheiHagimoto 0:0e0631af0305 521 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 522 inline v_float32x4 v_sqrt(const v_float32x4& x)
RyoheiHagimoto 0:0e0631af0305 523 {
RyoheiHagimoto 0:0e0631af0305 524 return v_float32x4(vsqrtq_f32(x.val));
RyoheiHagimoto 0:0e0631af0305 525 }
RyoheiHagimoto 0:0e0631af0305 526
RyoheiHagimoto 0:0e0631af0305 527 inline v_float32x4 v_invsqrt(const v_float32x4& x)
RyoheiHagimoto 0:0e0631af0305 528 {
RyoheiHagimoto 0:0e0631af0305 529 v_float32x4 one = v_setall_f32(1.0f);
RyoheiHagimoto 0:0e0631af0305 530 return one / v_sqrt(x);
RyoheiHagimoto 0:0e0631af0305 531 }
RyoheiHagimoto 0:0e0631af0305 532 #else
RyoheiHagimoto 0:0e0631af0305 533 inline v_float32x4 v_sqrt(const v_float32x4& x)
RyoheiHagimoto 0:0e0631af0305 534 {
RyoheiHagimoto 0:0e0631af0305 535 float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN));
RyoheiHagimoto 0:0e0631af0305 536 float32x4_t e = vrsqrteq_f32(x1);
RyoheiHagimoto 0:0e0631af0305 537 e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
RyoheiHagimoto 0:0e0631af0305 538 e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e);
RyoheiHagimoto 0:0e0631af0305 539 return v_float32x4(vmulq_f32(x.val, e));
RyoheiHagimoto 0:0e0631af0305 540 }
RyoheiHagimoto 0:0e0631af0305 541
RyoheiHagimoto 0:0e0631af0305 542 inline v_float32x4 v_invsqrt(const v_float32x4& x)
RyoheiHagimoto 0:0e0631af0305 543 {
RyoheiHagimoto 0:0e0631af0305 544 float32x4_t e = vrsqrteq_f32(x.val);
RyoheiHagimoto 0:0e0631af0305 545 e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e);
RyoheiHagimoto 0:0e0631af0305 546 e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e);
RyoheiHagimoto 0:0e0631af0305 547 return v_float32x4(e);
RyoheiHagimoto 0:0e0631af0305 548 }
RyoheiHagimoto 0:0e0631af0305 549 #endif
RyoheiHagimoto 0:0e0631af0305 550
RyoheiHagimoto 0:0e0631af0305 551 #define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \
RyoheiHagimoto 0:0e0631af0305 552 inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); }
RyoheiHagimoto 0:0e0631af0305 553
RyoheiHagimoto 0:0e0631af0305 554 OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8)
RyoheiHagimoto 0:0e0631af0305 555 OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16)
RyoheiHagimoto 0:0e0631af0305 556 OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32)
RyoheiHagimoto 0:0e0631af0305 557
RyoheiHagimoto 0:0e0631af0305 558 inline v_float32x4 v_abs(v_float32x4 x)
RyoheiHagimoto 0:0e0631af0305 559 { return v_float32x4(vabsq_f32(x.val)); }
RyoheiHagimoto 0:0e0631af0305 560
RyoheiHagimoto 0:0e0631af0305 561 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 562 #define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \
RyoheiHagimoto 0:0e0631af0305 563 inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \
RyoheiHagimoto 0:0e0631af0305 564 { \
RyoheiHagimoto 0:0e0631af0305 565 return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \
RyoheiHagimoto 0:0e0631af0305 566 } \
RyoheiHagimoto 0:0e0631af0305 567 inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \
RyoheiHagimoto 0:0e0631af0305 568 { \
RyoheiHagimoto 0:0e0631af0305 569 a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \
RyoheiHagimoto 0:0e0631af0305 570 return a; \
RyoheiHagimoto 0:0e0631af0305 571 }
RyoheiHagimoto 0:0e0631af0305 572
RyoheiHagimoto 0:0e0631af0305 573 OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64)
RyoheiHagimoto 0:0e0631af0305 574 OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64)
RyoheiHagimoto 0:0e0631af0305 575 OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64)
RyoheiHagimoto 0:0e0631af0305 576
RyoheiHagimoto 0:0e0631af0305 577 inline v_float64x2 operator ~ (const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 578 {
RyoheiHagimoto 0:0e0631af0305 579 return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val))));
RyoheiHagimoto 0:0e0631af0305 580 }
RyoheiHagimoto 0:0e0631af0305 581
RyoheiHagimoto 0:0e0631af0305 582 inline v_float64x2 v_sqrt(const v_float64x2& x)
RyoheiHagimoto 0:0e0631af0305 583 {
RyoheiHagimoto 0:0e0631af0305 584 return v_float64x2(vsqrtq_f64(x.val));
RyoheiHagimoto 0:0e0631af0305 585 }
RyoheiHagimoto 0:0e0631af0305 586
RyoheiHagimoto 0:0e0631af0305 587 inline v_float64x2 v_invsqrt(const v_float64x2& x)
RyoheiHagimoto 0:0e0631af0305 588 {
RyoheiHagimoto 0:0e0631af0305 589 v_float64x2 one = v_setall_f64(1.0f);
RyoheiHagimoto 0:0e0631af0305 590 return one / v_sqrt(x);
RyoheiHagimoto 0:0e0631af0305 591 }
RyoheiHagimoto 0:0e0631af0305 592
RyoheiHagimoto 0:0e0631af0305 593 inline v_float64x2 v_abs(v_float64x2 x)
RyoheiHagimoto 0:0e0631af0305 594 { return v_float64x2(vabsq_f64(x.val)); }
RyoheiHagimoto 0:0e0631af0305 595 #endif
RyoheiHagimoto 0:0e0631af0305 596
RyoheiHagimoto 0:0e0631af0305 597 // TODO: exp, log, sin, cos
RyoheiHagimoto 0:0e0631af0305 598
RyoheiHagimoto 0:0e0631af0305 599 #define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \
RyoheiHagimoto 0:0e0631af0305 600 inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 601 { \
RyoheiHagimoto 0:0e0631af0305 602 return _Tpvec(intrin(a.val, b.val)); \
RyoheiHagimoto 0:0e0631af0305 603 }
RyoheiHagimoto 0:0e0631af0305 604
RyoheiHagimoto 0:0e0631af0305 605 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_min, vminq_u8)
RyoheiHagimoto 0:0e0631af0305 606 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_max, vmaxq_u8)
RyoheiHagimoto 0:0e0631af0305 607 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_min, vminq_s8)
RyoheiHagimoto 0:0e0631af0305 608 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_max, vmaxq_s8)
RyoheiHagimoto 0:0e0631af0305 609 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_min, vminq_u16)
RyoheiHagimoto 0:0e0631af0305 610 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_max, vmaxq_u16)
RyoheiHagimoto 0:0e0631af0305 611 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_min, vminq_s16)
RyoheiHagimoto 0:0e0631af0305 612 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_max, vmaxq_s16)
RyoheiHagimoto 0:0e0631af0305 613 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_min, vminq_u32)
RyoheiHagimoto 0:0e0631af0305 614 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_max, vmaxq_u32)
RyoheiHagimoto 0:0e0631af0305 615 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32)
RyoheiHagimoto 0:0e0631af0305 616 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32)
RyoheiHagimoto 0:0e0631af0305 617 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32)
RyoheiHagimoto 0:0e0631af0305 618 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32)
RyoheiHagimoto 0:0e0631af0305 619 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 620 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64)
RyoheiHagimoto 0:0e0631af0305 621 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64)
RyoheiHagimoto 0:0e0631af0305 622 #endif
RyoheiHagimoto 0:0e0631af0305 623
RyoheiHagimoto 0:0e0631af0305 624 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 625 inline int64x2_t vmvnq_s64(int64x2_t a)
RyoheiHagimoto 0:0e0631af0305 626 {
RyoheiHagimoto 0:0e0631af0305 627 int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF));
RyoheiHagimoto 0:0e0631af0305 628 return veorq_s64(a, vx);
RyoheiHagimoto 0:0e0631af0305 629 }
RyoheiHagimoto 0:0e0631af0305 630 inline uint64x2_t vmvnq_u64(uint64x2_t a)
RyoheiHagimoto 0:0e0631af0305 631 {
RyoheiHagimoto 0:0e0631af0305 632 uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF));
RyoheiHagimoto 0:0e0631af0305 633 return veorq_u64(a, vx);
RyoheiHagimoto 0:0e0631af0305 634 }
RyoheiHagimoto 0:0e0631af0305 635 #endif
RyoheiHagimoto 0:0e0631af0305 636 #define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \
RyoheiHagimoto 0:0e0631af0305 637 inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 638 { return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \
RyoheiHagimoto 0:0e0631af0305 639 inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 640 { return _Tpvec(cast(vmvnq_##not_suffix(vceqq_##suffix(a.val, b.val)))); } \
RyoheiHagimoto 0:0e0631af0305 641 inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 642 { return _Tpvec(cast(vcltq_##suffix(a.val, b.val))); } \
RyoheiHagimoto 0:0e0631af0305 643 inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 644 { return _Tpvec(cast(vcgtq_##suffix(a.val, b.val))); } \
RyoheiHagimoto 0:0e0631af0305 645 inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 646 { return _Tpvec(cast(vcleq_##suffix(a.val, b.val))); } \
RyoheiHagimoto 0:0e0631af0305 647 inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 648 { return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); }
RyoheiHagimoto 0:0e0631af0305 649
RyoheiHagimoto 0:0e0631af0305 650 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8)
RyoheiHagimoto 0:0e0631af0305 651 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8)
RyoheiHagimoto 0:0e0631af0305 652 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16)
RyoheiHagimoto 0:0e0631af0305 653 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16)
RyoheiHagimoto 0:0e0631af0305 654 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32)
RyoheiHagimoto 0:0e0631af0305 655 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32)
RyoheiHagimoto 0:0e0631af0305 656 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32)
RyoheiHagimoto 0:0e0631af0305 657 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 658 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64)
RyoheiHagimoto 0:0e0631af0305 659 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64)
RyoheiHagimoto 0:0e0631af0305 660 OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64)
RyoheiHagimoto 0:0e0631af0305 661 #endif
RyoheiHagimoto 0:0e0631af0305 662
RyoheiHagimoto 0:0e0631af0305 663 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8)
RyoheiHagimoto 0:0e0631af0305 664 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8)
RyoheiHagimoto 0:0e0631af0305 665 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16)
RyoheiHagimoto 0:0e0631af0305 666 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_add_wrap, vaddq_s16)
RyoheiHagimoto 0:0e0631af0305 667 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8)
RyoheiHagimoto 0:0e0631af0305 668 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8)
RyoheiHagimoto 0:0e0631af0305 669 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16)
RyoheiHagimoto 0:0e0631af0305 670 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16)
RyoheiHagimoto 0:0e0631af0305 671
RyoheiHagimoto 0:0e0631af0305 672 // TODO: absdiff for signed integers
RyoheiHagimoto 0:0e0631af0305 673 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8)
RyoheiHagimoto 0:0e0631af0305 674 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16)
RyoheiHagimoto 0:0e0631af0305 675 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32)
RyoheiHagimoto 0:0e0631af0305 676 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32)
RyoheiHagimoto 0:0e0631af0305 677 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 678 OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64)
RyoheiHagimoto 0:0e0631af0305 679 #endif
RyoheiHagimoto 0:0e0631af0305 680
RyoheiHagimoto 0:0e0631af0305 681 #define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \
RyoheiHagimoto 0:0e0631af0305 682 inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 683 { \
RyoheiHagimoto 0:0e0631af0305 684 return _Tpvec2(cast(intrin(a.val, b.val))); \
RyoheiHagimoto 0:0e0631af0305 685 }
RyoheiHagimoto 0:0e0631af0305 686
RyoheiHagimoto 0:0e0631af0305 687 OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int8x16, v_uint8x16, vreinterpretq_u8_s8, v_absdiff, vabdq_s8)
RyoheiHagimoto 0:0e0631af0305 688 OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int16x8, v_uint16x8, vreinterpretq_u16_s16, v_absdiff, vabdq_s16)
RyoheiHagimoto 0:0e0631af0305 689 OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int32x4, v_uint32x4, vreinterpretq_u32_s32, v_absdiff, vabdq_s32)
RyoheiHagimoto 0:0e0631af0305 690
RyoheiHagimoto 0:0e0631af0305 691 inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b)
RyoheiHagimoto 0:0e0631af0305 692 {
RyoheiHagimoto 0:0e0631af0305 693 v_float32x4 x(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
RyoheiHagimoto 0:0e0631af0305 694 return v_sqrt(x);
RyoheiHagimoto 0:0e0631af0305 695 }
RyoheiHagimoto 0:0e0631af0305 696
RyoheiHagimoto 0:0e0631af0305 697 inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b)
RyoheiHagimoto 0:0e0631af0305 698 {
RyoheiHagimoto 0:0e0631af0305 699 return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val));
RyoheiHagimoto 0:0e0631af0305 700 }
RyoheiHagimoto 0:0e0631af0305 701
RyoheiHagimoto 0:0e0631af0305 702 inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c)
RyoheiHagimoto 0:0e0631af0305 703 {
RyoheiHagimoto 0:0e0631af0305 704 return v_float32x4(vmlaq_f32(c.val, a.val, b.val));
RyoheiHagimoto 0:0e0631af0305 705 }
RyoheiHagimoto 0:0e0631af0305 706
RyoheiHagimoto 0:0e0631af0305 707 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 708 inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b)
RyoheiHagimoto 0:0e0631af0305 709 {
RyoheiHagimoto 0:0e0631af0305 710 v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
RyoheiHagimoto 0:0e0631af0305 711 return v_sqrt(x);
RyoheiHagimoto 0:0e0631af0305 712 }
RyoheiHagimoto 0:0e0631af0305 713
RyoheiHagimoto 0:0e0631af0305 714 inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b)
RyoheiHagimoto 0:0e0631af0305 715 {
RyoheiHagimoto 0:0e0631af0305 716 return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val)));
RyoheiHagimoto 0:0e0631af0305 717 }
RyoheiHagimoto 0:0e0631af0305 718
RyoheiHagimoto 0:0e0631af0305 719 inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c)
RyoheiHagimoto 0:0e0631af0305 720 {
RyoheiHagimoto 0:0e0631af0305 721 return v_float64x2(vaddq_f64(c.val, vmulq_f64(a.val, b.val)));
RyoheiHagimoto 0:0e0631af0305 722 }
RyoheiHagimoto 0:0e0631af0305 723 #endif
RyoheiHagimoto 0:0e0631af0305 724
RyoheiHagimoto 0:0e0631af0305 725 // trade efficiency for convenience
RyoheiHagimoto 0:0e0631af0305 726 #define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \
RyoheiHagimoto 0:0e0631af0305 727 inline _Tpvec operator << (const _Tpvec& a, int n) \
RyoheiHagimoto 0:0e0631af0305 728 { return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \
RyoheiHagimoto 0:0e0631af0305 729 inline _Tpvec operator >> (const _Tpvec& a, int n) \
RyoheiHagimoto 0:0e0631af0305 730 { return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \
RyoheiHagimoto 0:0e0631af0305 731 template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 732 { return _Tpvec(vshlq_n_##suffix(a.val, n)); } \
RyoheiHagimoto 0:0e0631af0305 733 template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 734 { return _Tpvec(vshrq_n_##suffix(a.val, n)); } \
RyoheiHagimoto 0:0e0631af0305 735 template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 736 { return _Tpvec(vrshrq_n_##suffix(a.val, n)); }
RyoheiHagimoto 0:0e0631af0305 737
RyoheiHagimoto 0:0e0631af0305 738 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8)
RyoheiHagimoto 0:0e0631af0305 739 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8)
RyoheiHagimoto 0:0e0631af0305 740 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16)
RyoheiHagimoto 0:0e0631af0305 741 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16)
RyoheiHagimoto 0:0e0631af0305 742 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32)
RyoheiHagimoto 0:0e0631af0305 743 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32)
RyoheiHagimoto 0:0e0631af0305 744 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64)
RyoheiHagimoto 0:0e0631af0305 745 OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64)
RyoheiHagimoto 0:0e0631af0305 746
RyoheiHagimoto 0:0e0631af0305 747 #define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \
RyoheiHagimoto 0:0e0631af0305 748 inline _Tpvec v_load(const _Tp* ptr) \
RyoheiHagimoto 0:0e0631af0305 749 { return _Tpvec(vld1q_##suffix(ptr)); } \
RyoheiHagimoto 0:0e0631af0305 750 inline _Tpvec v_load_aligned(const _Tp* ptr) \
RyoheiHagimoto 0:0e0631af0305 751 { return _Tpvec(vld1q_##suffix(ptr)); } \
RyoheiHagimoto 0:0e0631af0305 752 inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \
RyoheiHagimoto 0:0e0631af0305 753 { return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \
RyoheiHagimoto 0:0e0631af0305 754 inline void v_store(_Tp* ptr, const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 755 { vst1q_##suffix(ptr, a.val); } \
RyoheiHagimoto 0:0e0631af0305 756 inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 757 { vst1q_##suffix(ptr, a.val); } \
RyoheiHagimoto 0:0e0631af0305 758 inline void v_store_low(_Tp* ptr, const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 759 { vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \
RyoheiHagimoto 0:0e0631af0305 760 inline void v_store_high(_Tp* ptr, const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 761 { vst1_##suffix(ptr, vget_high_##suffix(a.val)); }
RyoheiHagimoto 0:0e0631af0305 762
RyoheiHagimoto 0:0e0631af0305 763 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint8x16, uchar, u8)
RyoheiHagimoto 0:0e0631af0305 764 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int8x16, schar, s8)
RyoheiHagimoto 0:0e0631af0305 765 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16)
RyoheiHagimoto 0:0e0631af0305 766 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16)
RyoheiHagimoto 0:0e0631af0305 767 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32)
RyoheiHagimoto 0:0e0631af0305 768 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32)
RyoheiHagimoto 0:0e0631af0305 769 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64)
RyoheiHagimoto 0:0e0631af0305 770 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64)
RyoheiHagimoto 0:0e0631af0305 771 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32)
RyoheiHagimoto 0:0e0631af0305 772 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 773 OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64)
RyoheiHagimoto 0:0e0631af0305 774 #endif
RyoheiHagimoto 0:0e0631af0305 775
RyoheiHagimoto 0:0e0631af0305 776 #if defined (HAVE_FP16)
RyoheiHagimoto 0:0e0631af0305 777 // Workaround for old comiplers
RyoheiHagimoto 0:0e0631af0305 778 inline v_float16x4 v_load_f16(const short* ptr)
RyoheiHagimoto 0:0e0631af0305 779 { return v_float16x4(vld1_f16(ptr)); }
RyoheiHagimoto 0:0e0631af0305 780 inline void v_store_f16(short* ptr, v_float16x4& a)
RyoheiHagimoto 0:0e0631af0305 781 { vst1_f16(ptr, a.val); }
RyoheiHagimoto 0:0e0631af0305 782 #endif
RyoheiHagimoto 0:0e0631af0305 783
RyoheiHagimoto 0:0e0631af0305 784 #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
RyoheiHagimoto 0:0e0631af0305 785 inline scalartype v_reduce_##func(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 786 { \
RyoheiHagimoto 0:0e0631af0305 787 _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
RyoheiHagimoto 0:0e0631af0305 788 a0 = vp##vectorfunc##_##suffix(a0, a0); \
RyoheiHagimoto 0:0e0631af0305 789 return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \
RyoheiHagimoto 0:0e0631af0305 790 }
RyoheiHagimoto 0:0e0631af0305 791
RyoheiHagimoto 0:0e0631af0305 792 OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, sum, add, u16)
RyoheiHagimoto 0:0e0631af0305 793 OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, max, max, u16)
RyoheiHagimoto 0:0e0631af0305 794 OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, min, min, u16)
RyoheiHagimoto 0:0e0631af0305 795 OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, sum, add, s16)
RyoheiHagimoto 0:0e0631af0305 796 OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16)
RyoheiHagimoto 0:0e0631af0305 797 OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16)
RyoheiHagimoto 0:0e0631af0305 798
RyoheiHagimoto 0:0e0631af0305 799 #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \
RyoheiHagimoto 0:0e0631af0305 800 inline scalartype v_reduce_##func(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 801 { \
RyoheiHagimoto 0:0e0631af0305 802 _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \
RyoheiHagimoto 0:0e0631af0305 803 return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \
RyoheiHagimoto 0:0e0631af0305 804 }
RyoheiHagimoto 0:0e0631af0305 805
RyoheiHagimoto 0:0e0631af0305 806 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32)
RyoheiHagimoto 0:0e0631af0305 807 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32)
RyoheiHagimoto 0:0e0631af0305 808 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32)
RyoheiHagimoto 0:0e0631af0305 809 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32)
RyoheiHagimoto 0:0e0631af0305 810 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32)
RyoheiHagimoto 0:0e0631af0305 811 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32)
RyoheiHagimoto 0:0e0631af0305 812 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32)
RyoheiHagimoto 0:0e0631af0305 813 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32)
RyoheiHagimoto 0:0e0631af0305 814 OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32)
RyoheiHagimoto 0:0e0631af0305 815
RyoheiHagimoto 0:0e0631af0305 816 inline int v_signmask(const v_uint8x16& a)
RyoheiHagimoto 0:0e0631af0305 817 {
RyoheiHagimoto 0:0e0631af0305 818 int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100));
RyoheiHagimoto 0:0e0631af0305 819 uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0));
RyoheiHagimoto 0:0e0631af0305 820 uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0)));
RyoheiHagimoto 0:0e0631af0305 821 return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8);
RyoheiHagimoto 0:0e0631af0305 822 }
RyoheiHagimoto 0:0e0631af0305 823 inline int v_signmask(const v_int8x16& a)
RyoheiHagimoto 0:0e0631af0305 824 { return v_signmask(v_reinterpret_as_u8(a)); }
RyoheiHagimoto 0:0e0631af0305 825
RyoheiHagimoto 0:0e0631af0305 826 inline int v_signmask(const v_uint16x8& a)
RyoheiHagimoto 0:0e0631af0305 827 {
RyoheiHagimoto 0:0e0631af0305 828 int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000));
RyoheiHagimoto 0:0e0631af0305 829 uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0));
RyoheiHagimoto 0:0e0631af0305 830 uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0));
RyoheiHagimoto 0:0e0631af0305 831 return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4);
RyoheiHagimoto 0:0e0631af0305 832 }
RyoheiHagimoto 0:0e0631af0305 833 inline int v_signmask(const v_int16x8& a)
RyoheiHagimoto 0:0e0631af0305 834 { return v_signmask(v_reinterpret_as_u16(a)); }
RyoheiHagimoto 0:0e0631af0305 835
RyoheiHagimoto 0:0e0631af0305 836 inline int v_signmask(const v_uint32x4& a)
RyoheiHagimoto 0:0e0631af0305 837 {
RyoheiHagimoto 0:0e0631af0305 838 int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000));
RyoheiHagimoto 0:0e0631af0305 839 uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0));
RyoheiHagimoto 0:0e0631af0305 840 uint64x2_t v1 = vpaddlq_u32(v0);
RyoheiHagimoto 0:0e0631af0305 841 return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2);
RyoheiHagimoto 0:0e0631af0305 842 }
RyoheiHagimoto 0:0e0631af0305 843 inline int v_signmask(const v_int32x4& a)
RyoheiHagimoto 0:0e0631af0305 844 { return v_signmask(v_reinterpret_as_u32(a)); }
RyoheiHagimoto 0:0e0631af0305 845 inline int v_signmask(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 846 { return v_signmask(v_reinterpret_as_u32(a)); }
RyoheiHagimoto 0:0e0631af0305 847 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 848 inline int v_signmask(const v_uint64x2& a)
RyoheiHagimoto 0:0e0631af0305 849 {
RyoheiHagimoto 0:0e0631af0305 850 int64x1_t m0 = vdup_n_s64(0);
RyoheiHagimoto 0:0e0631af0305 851 uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0));
RyoheiHagimoto 0:0e0631af0305 852 return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1);
RyoheiHagimoto 0:0e0631af0305 853 }
RyoheiHagimoto 0:0e0631af0305 854 inline int v_signmask(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 855 { return v_signmask(v_reinterpret_as_u64(a)); }
RyoheiHagimoto 0:0e0631af0305 856 #endif
RyoheiHagimoto 0:0e0631af0305 857
RyoheiHagimoto 0:0e0631af0305 858 #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \
RyoheiHagimoto 0:0e0631af0305 859 inline bool v_check_all(const v_##_Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 860 { \
RyoheiHagimoto 0:0e0631af0305 861 _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \
RyoheiHagimoto 0:0e0631af0305 862 uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
RyoheiHagimoto 0:0e0631af0305 863 return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \
RyoheiHagimoto 0:0e0631af0305 864 } \
RyoheiHagimoto 0:0e0631af0305 865 inline bool v_check_any(const v_##_Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 866 { \
RyoheiHagimoto 0:0e0631af0305 867 _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \
RyoheiHagimoto 0:0e0631af0305 868 uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \
RyoheiHagimoto 0:0e0631af0305 869 return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \
RyoheiHagimoto 0:0e0631af0305 870 }
RyoheiHagimoto 0:0e0631af0305 871
RyoheiHagimoto 0:0e0631af0305 872 OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7)
RyoheiHagimoto 0:0e0631af0305 873 OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15)
RyoheiHagimoto 0:0e0631af0305 874 OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31)
RyoheiHagimoto 0:0e0631af0305 875 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 876 OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63)
RyoheiHagimoto 0:0e0631af0305 877 #endif
RyoheiHagimoto 0:0e0631af0305 878
RyoheiHagimoto 0:0e0631af0305 879 inline bool v_check_all(const v_int8x16& a)
RyoheiHagimoto 0:0e0631af0305 880 { return v_check_all(v_reinterpret_as_u8(a)); }
RyoheiHagimoto 0:0e0631af0305 881 inline bool v_check_all(const v_int16x8& a)
RyoheiHagimoto 0:0e0631af0305 882 { return v_check_all(v_reinterpret_as_u16(a)); }
RyoheiHagimoto 0:0e0631af0305 883 inline bool v_check_all(const v_int32x4& a)
RyoheiHagimoto 0:0e0631af0305 884 { return v_check_all(v_reinterpret_as_u32(a)); }
RyoheiHagimoto 0:0e0631af0305 885 inline bool v_check_all(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 886 { return v_check_all(v_reinterpret_as_u32(a)); }
RyoheiHagimoto 0:0e0631af0305 887
RyoheiHagimoto 0:0e0631af0305 888 inline bool v_check_any(const v_int8x16& a)
RyoheiHagimoto 0:0e0631af0305 889 { return v_check_any(v_reinterpret_as_u8(a)); }
RyoheiHagimoto 0:0e0631af0305 890 inline bool v_check_any(const v_int16x8& a)
RyoheiHagimoto 0:0e0631af0305 891 { return v_check_any(v_reinterpret_as_u16(a)); }
RyoheiHagimoto 0:0e0631af0305 892 inline bool v_check_any(const v_int32x4& a)
RyoheiHagimoto 0:0e0631af0305 893 { return v_check_any(v_reinterpret_as_u32(a)); }
RyoheiHagimoto 0:0e0631af0305 894 inline bool v_check_any(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 895 { return v_check_any(v_reinterpret_as_u32(a)); }
RyoheiHagimoto 0:0e0631af0305 896
RyoheiHagimoto 0:0e0631af0305 897 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 898 inline bool v_check_all(const v_int64x2& a)
RyoheiHagimoto 0:0e0631af0305 899 { return v_check_all(v_reinterpret_as_u64(a)); }
RyoheiHagimoto 0:0e0631af0305 900 inline bool v_check_all(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 901 { return v_check_all(v_reinterpret_as_u64(a)); }
RyoheiHagimoto 0:0e0631af0305 902 inline bool v_check_any(const v_int64x2& a)
RyoheiHagimoto 0:0e0631af0305 903 { return v_check_any(v_reinterpret_as_u64(a)); }
RyoheiHagimoto 0:0e0631af0305 904 inline bool v_check_any(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 905 { return v_check_any(v_reinterpret_as_u64(a)); }
RyoheiHagimoto 0:0e0631af0305 906 #endif
RyoheiHagimoto 0:0e0631af0305 907
RyoheiHagimoto 0:0e0631af0305 908 #define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \
RyoheiHagimoto 0:0e0631af0305 909 inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 910 { \
RyoheiHagimoto 0:0e0631af0305 911 return _Tpvec(vbslq_##suffix(vreinterpretq_##usuffix##_##suffix(mask.val), a.val, b.val)); \
RyoheiHagimoto 0:0e0631af0305 912 }
RyoheiHagimoto 0:0e0631af0305 913
RyoheiHagimoto 0:0e0631af0305 914 OPENCV_HAL_IMPL_NEON_SELECT(v_uint8x16, u8, u8)
RyoheiHagimoto 0:0e0631af0305 915 OPENCV_HAL_IMPL_NEON_SELECT(v_int8x16, s8, u8)
RyoheiHagimoto 0:0e0631af0305 916 OPENCV_HAL_IMPL_NEON_SELECT(v_uint16x8, u16, u16)
RyoheiHagimoto 0:0e0631af0305 917 OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16)
RyoheiHagimoto 0:0e0631af0305 918 OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32)
RyoheiHagimoto 0:0e0631af0305 919 OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32)
RyoheiHagimoto 0:0e0631af0305 920 OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32)
RyoheiHagimoto 0:0e0631af0305 921 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 922 OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64)
RyoheiHagimoto 0:0e0631af0305 923 #endif
RyoheiHagimoto 0:0e0631af0305 924
RyoheiHagimoto 0:0e0631af0305 925 #define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \
RyoheiHagimoto 0:0e0631af0305 926 inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \
RyoheiHagimoto 0:0e0631af0305 927 { \
RyoheiHagimoto 0:0e0631af0305 928 b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \
RyoheiHagimoto 0:0e0631af0305 929 b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \
RyoheiHagimoto 0:0e0631af0305 930 } \
RyoheiHagimoto 0:0e0631af0305 931 inline _Tpwvec v_load_expand(const _Tp* ptr) \
RyoheiHagimoto 0:0e0631af0305 932 { \
RyoheiHagimoto 0:0e0631af0305 933 return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \
RyoheiHagimoto 0:0e0631af0305 934 }
RyoheiHagimoto 0:0e0631af0305 935
RyoheiHagimoto 0:0e0631af0305 936 OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8)
RyoheiHagimoto 0:0e0631af0305 937 OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8)
RyoheiHagimoto 0:0e0631af0305 938 OPENCV_HAL_IMPL_NEON_EXPAND(v_uint16x8, v_uint32x4, ushort, u16)
RyoheiHagimoto 0:0e0631af0305 939 OPENCV_HAL_IMPL_NEON_EXPAND(v_int16x8, v_int32x4, short, s16)
RyoheiHagimoto 0:0e0631af0305 940 OPENCV_HAL_IMPL_NEON_EXPAND(v_uint32x4, v_uint64x2, uint, u32)
RyoheiHagimoto 0:0e0631af0305 941 OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32)
RyoheiHagimoto 0:0e0631af0305 942
RyoheiHagimoto 0:0e0631af0305 943 inline v_uint32x4 v_load_expand_q(const uchar* ptr)
RyoheiHagimoto 0:0e0631af0305 944 {
RyoheiHagimoto 0:0e0631af0305 945 uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr);
RyoheiHagimoto 0:0e0631af0305 946 uint16x4_t v1 = vget_low_u16(vmovl_u8(v0));
RyoheiHagimoto 0:0e0631af0305 947 return v_uint32x4(vmovl_u16(v1));
RyoheiHagimoto 0:0e0631af0305 948 }
RyoheiHagimoto 0:0e0631af0305 949
RyoheiHagimoto 0:0e0631af0305 950 inline v_int32x4 v_load_expand_q(const schar* ptr)
RyoheiHagimoto 0:0e0631af0305 951 {
RyoheiHagimoto 0:0e0631af0305 952 int8x8_t v0 = vcreate_s8(*(unsigned*)ptr);
RyoheiHagimoto 0:0e0631af0305 953 int16x4_t v1 = vget_low_s16(vmovl_s8(v0));
RyoheiHagimoto 0:0e0631af0305 954 return v_int32x4(vmovl_s16(v1));
RyoheiHagimoto 0:0e0631af0305 955 }
RyoheiHagimoto 0:0e0631af0305 956
RyoheiHagimoto 0:0e0631af0305 957 #if defined(__aarch64__)
RyoheiHagimoto 0:0e0631af0305 958 #define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
RyoheiHagimoto 0:0e0631af0305 959 inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
RyoheiHagimoto 0:0e0631af0305 960 { \
RyoheiHagimoto 0:0e0631af0305 961 b0.val = vzip1q_##suffix(a0.val, a1.val); \
RyoheiHagimoto 0:0e0631af0305 962 b1.val = vzip2q_##suffix(a0.val, a1.val); \
RyoheiHagimoto 0:0e0631af0305 963 } \
RyoheiHagimoto 0:0e0631af0305 964 inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 965 { \
RyoheiHagimoto 0:0e0631af0305 966 return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
RyoheiHagimoto 0:0e0631af0305 967 } \
RyoheiHagimoto 0:0e0631af0305 968 inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 969 { \
RyoheiHagimoto 0:0e0631af0305 970 return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
RyoheiHagimoto 0:0e0631af0305 971 } \
RyoheiHagimoto 0:0e0631af0305 972 inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
RyoheiHagimoto 0:0e0631af0305 973 { \
RyoheiHagimoto 0:0e0631af0305 974 c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
RyoheiHagimoto 0:0e0631af0305 975 d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
RyoheiHagimoto 0:0e0631af0305 976 }
RyoheiHagimoto 0:0e0631af0305 977 #else
RyoheiHagimoto 0:0e0631af0305 978 #define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \
RyoheiHagimoto 0:0e0631af0305 979 inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \
RyoheiHagimoto 0:0e0631af0305 980 { \
RyoheiHagimoto 0:0e0631af0305 981 _Tpvec##x2_t p = vzipq_##suffix(a0.val, a1.val); \
RyoheiHagimoto 0:0e0631af0305 982 b0.val = p.val[0]; \
RyoheiHagimoto 0:0e0631af0305 983 b1.val = p.val[1]; \
RyoheiHagimoto 0:0e0631af0305 984 } \
RyoheiHagimoto 0:0e0631af0305 985 inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 986 { \
RyoheiHagimoto 0:0e0631af0305 987 return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \
RyoheiHagimoto 0:0e0631af0305 988 } \
RyoheiHagimoto 0:0e0631af0305 989 inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 990 { \
RyoheiHagimoto 0:0e0631af0305 991 return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \
RyoheiHagimoto 0:0e0631af0305 992 } \
RyoheiHagimoto 0:0e0631af0305 993 inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \
RyoheiHagimoto 0:0e0631af0305 994 { \
RyoheiHagimoto 0:0e0631af0305 995 c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \
RyoheiHagimoto 0:0e0631af0305 996 d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \
RyoheiHagimoto 0:0e0631af0305 997 }
RyoheiHagimoto 0:0e0631af0305 998 #endif
RyoheiHagimoto 0:0e0631af0305 999
RyoheiHagimoto 0:0e0631af0305 1000 OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8)
RyoheiHagimoto 0:0e0631af0305 1001 OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8)
RyoheiHagimoto 0:0e0631af0305 1002 OPENCV_HAL_IMPL_NEON_UNPACKS(uint16x8, u16)
RyoheiHagimoto 0:0e0631af0305 1003 OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16)
RyoheiHagimoto 0:0e0631af0305 1004 OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32)
RyoheiHagimoto 0:0e0631af0305 1005 OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32)
RyoheiHagimoto 0:0e0631af0305 1006 OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32)
RyoheiHagimoto 0:0e0631af0305 1007 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 1008 OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64)
RyoheiHagimoto 0:0e0631af0305 1009 #endif
RyoheiHagimoto 0:0e0631af0305 1010
RyoheiHagimoto 0:0e0631af0305 1011 #define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \
RyoheiHagimoto 0:0e0631af0305 1012 template <int s> \
RyoheiHagimoto 0:0e0631af0305 1013 inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 1014 { \
RyoheiHagimoto 0:0e0631af0305 1015 return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \
RyoheiHagimoto 0:0e0631af0305 1016 }
RyoheiHagimoto 0:0e0631af0305 1017
RyoheiHagimoto 0:0e0631af0305 1018 OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8)
RyoheiHagimoto 0:0e0631af0305 1019 OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8)
RyoheiHagimoto 0:0e0631af0305 1020 OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16)
RyoheiHagimoto 0:0e0631af0305 1021 OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16)
RyoheiHagimoto 0:0e0631af0305 1022 OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32)
RyoheiHagimoto 0:0e0631af0305 1023 OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32)
RyoheiHagimoto 0:0e0631af0305 1024 OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64)
RyoheiHagimoto 0:0e0631af0305 1025 OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64)
RyoheiHagimoto 0:0e0631af0305 1026 OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32)
RyoheiHagimoto 0:0e0631af0305 1027 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 1028 OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64)
RyoheiHagimoto 0:0e0631af0305 1029 #endif
RyoheiHagimoto 0:0e0631af0305 1030
RyoheiHagimoto 0:0e0631af0305 1031 inline v_int32x4 v_round(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 1032 {
RyoheiHagimoto 0:0e0631af0305 1033 static const int32x4_t v_sign = vdupq_n_s32(1 << 31),
RyoheiHagimoto 0:0e0631af0305 1034 v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f));
RyoheiHagimoto 0:0e0631af0305 1035
RyoheiHagimoto 0:0e0631af0305 1036 int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val)));
RyoheiHagimoto 0:0e0631af0305 1037 return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition))));
RyoheiHagimoto 0:0e0631af0305 1038 }
RyoheiHagimoto 0:0e0631af0305 1039
RyoheiHagimoto 0:0e0631af0305 1040 inline v_int32x4 v_floor(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 1041 {
RyoheiHagimoto 0:0e0631af0305 1042 int32x4_t a1 = vcvtq_s32_f32(a.val);
RyoheiHagimoto 0:0e0631af0305 1043 uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val);
RyoheiHagimoto 0:0e0631af0305 1044 return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask)));
RyoheiHagimoto 0:0e0631af0305 1045 }
RyoheiHagimoto 0:0e0631af0305 1046
RyoheiHagimoto 0:0e0631af0305 1047 inline v_int32x4 v_ceil(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 1048 {
RyoheiHagimoto 0:0e0631af0305 1049 int32x4_t a1 = vcvtq_s32_f32(a.val);
RyoheiHagimoto 0:0e0631af0305 1050 uint32x4_t mask = vcgtq_f32(a.val, vcvtq_f32_s32(a1));
RyoheiHagimoto 0:0e0631af0305 1051 return v_int32x4(vsubq_s32(a1, vreinterpretq_s32_u32(mask)));
RyoheiHagimoto 0:0e0631af0305 1052 }
RyoheiHagimoto 0:0e0631af0305 1053
RyoheiHagimoto 0:0e0631af0305 1054 inline v_int32x4 v_trunc(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 1055 { return v_int32x4(vcvtq_s32_f32(a.val)); }
RyoheiHagimoto 0:0e0631af0305 1056
RyoheiHagimoto 0:0e0631af0305 1057 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 1058 inline v_int32x4 v_round(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 1059 {
RyoheiHagimoto 0:0e0631af0305 1060 static const int32x2_t zero = vdup_n_s32(0);
RyoheiHagimoto 0:0e0631af0305 1061 return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
RyoheiHagimoto 0:0e0631af0305 1062 }
RyoheiHagimoto 0:0e0631af0305 1063
RyoheiHagimoto 0:0e0631af0305 1064 inline v_int32x4 v_floor(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 1065 {
RyoheiHagimoto 0:0e0631af0305 1066 static const int32x2_t zero = vdup_n_s32(0);
RyoheiHagimoto 0:0e0631af0305 1067 int64x2_t a1 = vcvtq_s64_f64(a.val);
RyoheiHagimoto 0:0e0631af0305 1068 uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val);
RyoheiHagimoto 0:0e0631af0305 1069 a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask));
RyoheiHagimoto 0:0e0631af0305 1070 return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
RyoheiHagimoto 0:0e0631af0305 1071 }
RyoheiHagimoto 0:0e0631af0305 1072
RyoheiHagimoto 0:0e0631af0305 1073 inline v_int32x4 v_ceil(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 1074 {
RyoheiHagimoto 0:0e0631af0305 1075 static const int32x2_t zero = vdup_n_s32(0);
RyoheiHagimoto 0:0e0631af0305 1076 int64x2_t a1 = vcvtq_s64_f64(a.val);
RyoheiHagimoto 0:0e0631af0305 1077 uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1));
RyoheiHagimoto 0:0e0631af0305 1078 a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask));
RyoheiHagimoto 0:0e0631af0305 1079 return v_int32x4(vcombine_s32(vmovn_s64(a1), zero));
RyoheiHagimoto 0:0e0631af0305 1080 }
RyoheiHagimoto 0:0e0631af0305 1081
RyoheiHagimoto 0:0e0631af0305 1082 inline v_int32x4 v_trunc(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 1083 {
RyoheiHagimoto 0:0e0631af0305 1084 static const int32x2_t zero = vdup_n_s32(0);
RyoheiHagimoto 0:0e0631af0305 1085 return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero));
RyoheiHagimoto 0:0e0631af0305 1086 }
RyoheiHagimoto 0:0e0631af0305 1087 #endif
RyoheiHagimoto 0:0e0631af0305 1088
RyoheiHagimoto 0:0e0631af0305 1089 #define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \
RyoheiHagimoto 0:0e0631af0305 1090 inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \
RyoheiHagimoto 0:0e0631af0305 1091 const v_##_Tpvec& a2, const v_##_Tpvec& a3, \
RyoheiHagimoto 0:0e0631af0305 1092 v_##_Tpvec& b0, v_##_Tpvec& b1, \
RyoheiHagimoto 0:0e0631af0305 1093 v_##_Tpvec& b2, v_##_Tpvec& b3) \
RyoheiHagimoto 0:0e0631af0305 1094 { \
RyoheiHagimoto 0:0e0631af0305 1095 /* m00 m01 m02 m03 */ \
RyoheiHagimoto 0:0e0631af0305 1096 /* m10 m11 m12 m13 */ \
RyoheiHagimoto 0:0e0631af0305 1097 /* m20 m21 m22 m23 */ \
RyoheiHagimoto 0:0e0631af0305 1098 /* m30 m31 m32 m33 */ \
RyoheiHagimoto 0:0e0631af0305 1099 _Tpvec##x2_t t0 = vtrnq_##suffix(a0.val, a1.val); \
RyoheiHagimoto 0:0e0631af0305 1100 _Tpvec##x2_t t1 = vtrnq_##suffix(a2.val, a3.val); \
RyoheiHagimoto 0:0e0631af0305 1101 /* m00 m10 m02 m12 */ \
RyoheiHagimoto 0:0e0631af0305 1102 /* m01 m11 m03 m13 */ \
RyoheiHagimoto 0:0e0631af0305 1103 /* m20 m30 m22 m32 */ \
RyoheiHagimoto 0:0e0631af0305 1104 /* m21 m31 m23 m33 */ \
RyoheiHagimoto 0:0e0631af0305 1105 b0.val = vcombine_##suffix(vget_low_##suffix(t0.val[0]), vget_low_##suffix(t1.val[0])); \
RyoheiHagimoto 0:0e0631af0305 1106 b1.val = vcombine_##suffix(vget_low_##suffix(t0.val[1]), vget_low_##suffix(t1.val[1])); \
RyoheiHagimoto 0:0e0631af0305 1107 b2.val = vcombine_##suffix(vget_high_##suffix(t0.val[0]), vget_high_##suffix(t1.val[0])); \
RyoheiHagimoto 0:0e0631af0305 1108 b3.val = vcombine_##suffix(vget_high_##suffix(t0.val[1]), vget_high_##suffix(t1.val[1])); \
RyoheiHagimoto 0:0e0631af0305 1109 }
RyoheiHagimoto 0:0e0631af0305 1110
RyoheiHagimoto 0:0e0631af0305 1111 OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32)
RyoheiHagimoto 0:0e0631af0305 1112 OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32)
RyoheiHagimoto 0:0e0631af0305 1113 OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32)
RyoheiHagimoto 0:0e0631af0305 1114
RyoheiHagimoto 0:0e0631af0305 1115 #define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \
RyoheiHagimoto 0:0e0631af0305 1116 inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 1117 { \
RyoheiHagimoto 0:0e0631af0305 1118 _Tpvec##x2_t v = vld2q_##suffix(ptr); \
RyoheiHagimoto 0:0e0631af0305 1119 a.val = v.val[0]; \
RyoheiHagimoto 0:0e0631af0305 1120 b.val = v.val[1]; \
RyoheiHagimoto 0:0e0631af0305 1121 } \
RyoheiHagimoto 0:0e0631af0305 1122 inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \
RyoheiHagimoto 0:0e0631af0305 1123 { \
RyoheiHagimoto 0:0e0631af0305 1124 _Tpvec##x3_t v = vld3q_##suffix(ptr); \
RyoheiHagimoto 0:0e0631af0305 1125 a.val = v.val[0]; \
RyoheiHagimoto 0:0e0631af0305 1126 b.val = v.val[1]; \
RyoheiHagimoto 0:0e0631af0305 1127 c.val = v.val[2]; \
RyoheiHagimoto 0:0e0631af0305 1128 } \
RyoheiHagimoto 0:0e0631af0305 1129 inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \
RyoheiHagimoto 0:0e0631af0305 1130 v_##_Tpvec& c, v_##_Tpvec& d) \
RyoheiHagimoto 0:0e0631af0305 1131 { \
RyoheiHagimoto 0:0e0631af0305 1132 _Tpvec##x4_t v = vld4q_##suffix(ptr); \
RyoheiHagimoto 0:0e0631af0305 1133 a.val = v.val[0]; \
RyoheiHagimoto 0:0e0631af0305 1134 b.val = v.val[1]; \
RyoheiHagimoto 0:0e0631af0305 1135 c.val = v.val[2]; \
RyoheiHagimoto 0:0e0631af0305 1136 d.val = v.val[3]; \
RyoheiHagimoto 0:0e0631af0305 1137 } \
RyoheiHagimoto 0:0e0631af0305 1138 inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 1139 { \
RyoheiHagimoto 0:0e0631af0305 1140 _Tpvec##x2_t v; \
RyoheiHagimoto 0:0e0631af0305 1141 v.val[0] = a.val; \
RyoheiHagimoto 0:0e0631af0305 1142 v.val[1] = b.val; \
RyoheiHagimoto 0:0e0631af0305 1143 vst2q_##suffix(ptr, v); \
RyoheiHagimoto 0:0e0631af0305 1144 } \
RyoheiHagimoto 0:0e0631af0305 1145 inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \
RyoheiHagimoto 0:0e0631af0305 1146 { \
RyoheiHagimoto 0:0e0631af0305 1147 _Tpvec##x3_t v; \
RyoheiHagimoto 0:0e0631af0305 1148 v.val[0] = a.val; \
RyoheiHagimoto 0:0e0631af0305 1149 v.val[1] = b.val; \
RyoheiHagimoto 0:0e0631af0305 1150 v.val[2] = c.val; \
RyoheiHagimoto 0:0e0631af0305 1151 vst3q_##suffix(ptr, v); \
RyoheiHagimoto 0:0e0631af0305 1152 } \
RyoheiHagimoto 0:0e0631af0305 1153 inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \
RyoheiHagimoto 0:0e0631af0305 1154 const v_##_Tpvec& c, const v_##_Tpvec& d) \
RyoheiHagimoto 0:0e0631af0305 1155 { \
RyoheiHagimoto 0:0e0631af0305 1156 _Tpvec##x4_t v; \
RyoheiHagimoto 0:0e0631af0305 1157 v.val[0] = a.val; \
RyoheiHagimoto 0:0e0631af0305 1158 v.val[1] = b.val; \
RyoheiHagimoto 0:0e0631af0305 1159 v.val[2] = c.val; \
RyoheiHagimoto 0:0e0631af0305 1160 v.val[3] = d.val; \
RyoheiHagimoto 0:0e0631af0305 1161 vst4q_##suffix(ptr, v); \
RyoheiHagimoto 0:0e0631af0305 1162 }
RyoheiHagimoto 0:0e0631af0305 1163
RyoheiHagimoto 0:0e0631af0305 1164 OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8)
RyoheiHagimoto 0:0e0631af0305 1165 OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8)
RyoheiHagimoto 0:0e0631af0305 1166 OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16)
RyoheiHagimoto 0:0e0631af0305 1167 OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16)
RyoheiHagimoto 0:0e0631af0305 1168 OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32)
RyoheiHagimoto 0:0e0631af0305 1169 OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32)
RyoheiHagimoto 0:0e0631af0305 1170 OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32)
RyoheiHagimoto 0:0e0631af0305 1171 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 1172 OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64)
RyoheiHagimoto 0:0e0631af0305 1173 #endif
RyoheiHagimoto 0:0e0631af0305 1174
RyoheiHagimoto 0:0e0631af0305 1175 inline v_float32x4 v_cvt_f32(const v_int32x4& a)
RyoheiHagimoto 0:0e0631af0305 1176 {
RyoheiHagimoto 0:0e0631af0305 1177 return v_float32x4(vcvtq_f32_s32(a.val));
RyoheiHagimoto 0:0e0631af0305 1178 }
RyoheiHagimoto 0:0e0631af0305 1179
RyoheiHagimoto 0:0e0631af0305 1180 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 1181 inline v_float32x4 v_cvt_f32(const v_float64x2& a)
RyoheiHagimoto 0:0e0631af0305 1182 {
RyoheiHagimoto 0:0e0631af0305 1183 float32x2_t zero = vdup_n_f32(0.0f);
RyoheiHagimoto 0:0e0631af0305 1184 return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero));
RyoheiHagimoto 0:0e0631af0305 1185 }
RyoheiHagimoto 0:0e0631af0305 1186
RyoheiHagimoto 0:0e0631af0305 1187 inline v_float64x2 v_cvt_f64(const v_int32x4& a)
RyoheiHagimoto 0:0e0631af0305 1188 {
RyoheiHagimoto 0:0e0631af0305 1189 return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val))));
RyoheiHagimoto 0:0e0631af0305 1190 }
RyoheiHagimoto 0:0e0631af0305 1191
RyoheiHagimoto 0:0e0631af0305 1192 inline v_float64x2 v_cvt_f64_high(const v_int32x4& a)
RyoheiHagimoto 0:0e0631af0305 1193 {
RyoheiHagimoto 0:0e0631af0305 1194 return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val))));
RyoheiHagimoto 0:0e0631af0305 1195 }
RyoheiHagimoto 0:0e0631af0305 1196
RyoheiHagimoto 0:0e0631af0305 1197 inline v_float64x2 v_cvt_f64(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 1198 {
RyoheiHagimoto 0:0e0631af0305 1199 return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val)));
RyoheiHagimoto 0:0e0631af0305 1200 }
RyoheiHagimoto 0:0e0631af0305 1201
RyoheiHagimoto 0:0e0631af0305 1202 inline v_float64x2 v_cvt_f64_high(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 1203 {
RyoheiHagimoto 0:0e0631af0305 1204 return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val)));
RyoheiHagimoto 0:0e0631af0305 1205 }
RyoheiHagimoto 0:0e0631af0305 1206 #endif
RyoheiHagimoto 0:0e0631af0305 1207
RyoheiHagimoto 0:0e0631af0305 1208 #if defined (HAVE_FP16)
RyoheiHagimoto 0:0e0631af0305 1209 inline v_float32x4 v_cvt_f32(const v_float16x4& a)
RyoheiHagimoto 0:0e0631af0305 1210 {
RyoheiHagimoto 0:0e0631af0305 1211 return v_float32x4(vcvt_f32_f16(a.val));
RyoheiHagimoto 0:0e0631af0305 1212 }
RyoheiHagimoto 0:0e0631af0305 1213
RyoheiHagimoto 0:0e0631af0305 1214 inline v_float16x4 v_cvt_f16(const v_float32x4& a)
RyoheiHagimoto 0:0e0631af0305 1215 {
RyoheiHagimoto 0:0e0631af0305 1216 return v_float16x4(vcvt_f16_f32(a.val));
RyoheiHagimoto 0:0e0631af0305 1217 }
RyoheiHagimoto 0:0e0631af0305 1218 #endif
RyoheiHagimoto 0:0e0631af0305 1219
RyoheiHagimoto 0:0e0631af0305 1220 //! @name Check SIMD support
RyoheiHagimoto 0:0e0631af0305 1221 //! @{
RyoheiHagimoto 0:0e0631af0305 1222 //! @brief Check CPU capability of SIMD operation
RyoheiHagimoto 0:0e0631af0305 1223 static inline bool hasSIMD128()
RyoheiHagimoto 0:0e0631af0305 1224 {
RyoheiHagimoto 0:0e0631af0305 1225 return checkHardwareSupport(CV_CPU_NEON);
RyoheiHagimoto 0:0e0631af0305 1226 }
RyoheiHagimoto 0:0e0631af0305 1227
RyoheiHagimoto 0:0e0631af0305 1228 //! @}
RyoheiHagimoto 0:0e0631af0305 1229
RyoheiHagimoto 0:0e0631af0305 1230 //! @endcond
RyoheiHagimoto 0:0e0631af0305 1231
RyoheiHagimoto 0:0e0631af0305 1232 }
RyoheiHagimoto 0:0e0631af0305 1233
RyoheiHagimoto 0:0e0631af0305 1234 #endif