openCV library for Renesas RZ/A
Dependents: RZ_A2M_Mbed_samples
include/opencv2/core/hal/intrin_neon.hpp@0:0e0631af0305, 2021-01-29 (annotated)
- Committer:
- RyoheiHagimoto
- Date:
- Fri Jan 29 04:53:38 2021 +0000
- Revision:
- 0:0e0631af0305
copied from https://github.com/d-kato/opencv-lib.
Who changed what in which revision?
| User | Revision | Line number | New contents of line |
|---|---|---|---|
| RyoheiHagimoto | 0:0e0631af0305 | 1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
| RyoheiHagimoto | 0:0e0631af0305 | 2 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
| RyoheiHagimoto | 0:0e0631af0305 | 4 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 5 | // By downloading, copying, installing or using the software you agree to this license. |
| RyoheiHagimoto | 0:0e0631af0305 | 6 | // If you do not agree to this license, do not download, install, |
| RyoheiHagimoto | 0:0e0631af0305 | 7 | // copy or use the software. |
| RyoheiHagimoto | 0:0e0631af0305 | 8 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 9 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 10 | // License Agreement |
| RyoheiHagimoto | 0:0e0631af0305 | 11 | // For Open Source Computer Vision Library |
| RyoheiHagimoto | 0:0e0631af0305 | 12 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 13 | // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 14 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 15 | // Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 16 | // Copyright (C) 2015, Itseez Inc., all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 17 | // Third party copyrights are property of their respective owners. |
| RyoheiHagimoto | 0:0e0631af0305 | 18 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 19 | // Redistribution and use in source and binary forms, with or without modification, |
| RyoheiHagimoto | 0:0e0631af0305 | 20 | // are permitted provided that the following conditions are met: |
| RyoheiHagimoto | 0:0e0631af0305 | 21 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 22 | // * Redistribution's of source code must retain the above copyright notice, |
| RyoheiHagimoto | 0:0e0631af0305 | 23 | // this list of conditions and the following disclaimer. |
| RyoheiHagimoto | 0:0e0631af0305 | 24 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 25 | // * Redistribution's in binary form must reproduce the above copyright notice, |
| RyoheiHagimoto | 0:0e0631af0305 | 26 | // this list of conditions and the following disclaimer in the documentation |
| RyoheiHagimoto | 0:0e0631af0305 | 27 | // and/or other materials provided with the distribution. |
| RyoheiHagimoto | 0:0e0631af0305 | 28 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 29 | // * The name of the copyright holders may not be used to endorse or promote products |
| RyoheiHagimoto | 0:0e0631af0305 | 30 | // derived from this software without specific prior written permission. |
| RyoheiHagimoto | 0:0e0631af0305 | 31 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 32 | // This software is provided by the copyright holders and contributors "as is" and |
| RyoheiHagimoto | 0:0e0631af0305 | 33 | // any express or implied warranties, including, but not limited to, the implied |
| RyoheiHagimoto | 0:0e0631af0305 | 34 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
| RyoheiHagimoto | 0:0e0631af0305 | 35 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
| RyoheiHagimoto | 0:0e0631af0305 | 36 | // indirect, incidental, special, exemplary, or consequential damages |
| RyoheiHagimoto | 0:0e0631af0305 | 37 | // (including, but not limited to, procurement of substitute goods or services; |
| RyoheiHagimoto | 0:0e0631af0305 | 38 | // loss of use, data, or profits; or business interruption) however caused |
| RyoheiHagimoto | 0:0e0631af0305 | 39 | // and on any theory of liability, whether in contract, strict liability, |
| RyoheiHagimoto | 0:0e0631af0305 | 40 | // or tort (including negligence or otherwise) arising in any way out of |
| RyoheiHagimoto | 0:0e0631af0305 | 41 | // the use of this software, even if advised of the possibility of such damage. |
| RyoheiHagimoto | 0:0e0631af0305 | 42 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 43 | //M*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 44 | |
| RyoheiHagimoto | 0:0e0631af0305 | 45 | #ifndef OPENCV_HAL_INTRIN_NEON_HPP |
| RyoheiHagimoto | 0:0e0631af0305 | 46 | #define OPENCV_HAL_INTRIN_NEON_HPP |
| RyoheiHagimoto | 0:0e0631af0305 | 47 | |
| RyoheiHagimoto | 0:0e0631af0305 | 48 | #include <algorithm> |
| RyoheiHagimoto | 0:0e0631af0305 | 49 | #include "opencv2/core/utility.hpp" |
| RyoheiHagimoto | 0:0e0631af0305 | 50 | |
| RyoheiHagimoto | 0:0e0631af0305 | 51 | namespace cv |
| RyoheiHagimoto | 0:0e0631af0305 | 52 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 53 | |
| RyoheiHagimoto | 0:0e0631af0305 | 54 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 55 | |
| RyoheiHagimoto | 0:0e0631af0305 | 56 | #define CV_SIMD128 1 |
| RyoheiHagimoto | 0:0e0631af0305 | 57 | #if defined(__aarch64__) |
| RyoheiHagimoto | 0:0e0631af0305 | 58 | #define CV_SIMD128_64F 1 |
| RyoheiHagimoto | 0:0e0631af0305 | 59 | #else |
| RyoheiHagimoto | 0:0e0631af0305 | 60 | #define CV_SIMD128_64F 0 |
| RyoheiHagimoto | 0:0e0631af0305 | 61 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 62 | |
| RyoheiHagimoto | 0:0e0631af0305 | 63 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 64 | #define OPENCV_HAL_IMPL_NEON_REINTERPRET(_Tpv, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 65 | template <typename T> static inline \ |
| RyoheiHagimoto | 0:0e0631af0305 | 66 | _Tpv vreinterpretq_##suffix##_f64(T a) { return (_Tpv) a; } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 67 | template <typename T> static inline \ |
| RyoheiHagimoto | 0:0e0631af0305 | 68 | float64x2_t vreinterpretq_f64_##suffix(T a) { return (float64x2_t) a; } |
| RyoheiHagimoto | 0:0e0631af0305 | 69 | OPENCV_HAL_IMPL_NEON_REINTERPRET(uint8x16_t, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 70 | OPENCV_HAL_IMPL_NEON_REINTERPRET(int8x16_t, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 71 | OPENCV_HAL_IMPL_NEON_REINTERPRET(uint16x8_t, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 72 | OPENCV_HAL_IMPL_NEON_REINTERPRET(int16x8_t, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 73 | OPENCV_HAL_IMPL_NEON_REINTERPRET(uint32x4_t, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 74 | OPENCV_HAL_IMPL_NEON_REINTERPRET(int32x4_t, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 75 | OPENCV_HAL_IMPL_NEON_REINTERPRET(uint64x2_t, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 76 | OPENCV_HAL_IMPL_NEON_REINTERPRET(int64x2_t, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 77 | OPENCV_HAL_IMPL_NEON_REINTERPRET(float32x4_t, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 78 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 79 | |
| RyoheiHagimoto | 0:0e0631af0305 | 80 | struct v_uint8x16 |
| RyoheiHagimoto | 0:0e0631af0305 | 81 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 82 | typedef uchar lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 83 | enum { nlanes = 16 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 84 | |
| RyoheiHagimoto | 0:0e0631af0305 | 85 | v_uint8x16() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 86 | explicit v_uint8x16(uint8x16_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 87 | v_uint8x16(uchar v0, uchar v1, uchar v2, uchar v3, uchar v4, uchar v5, uchar v6, uchar v7, |
| RyoheiHagimoto | 0:0e0631af0305 | 88 | uchar v8, uchar v9, uchar v10, uchar v11, uchar v12, uchar v13, uchar v14, uchar v15) |
| RyoheiHagimoto | 0:0e0631af0305 | 89 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 90 | uchar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; |
| RyoheiHagimoto | 0:0e0631af0305 | 91 | val = vld1q_u8(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 92 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 93 | uchar get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 94 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 95 | return vgetq_lane_u8(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 96 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 97 | |
| RyoheiHagimoto | 0:0e0631af0305 | 98 | uint8x16_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 99 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 100 | |
| RyoheiHagimoto | 0:0e0631af0305 | 101 | struct v_int8x16 |
| RyoheiHagimoto | 0:0e0631af0305 | 102 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 103 | typedef schar lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 104 | enum { nlanes = 16 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 105 | |
| RyoheiHagimoto | 0:0e0631af0305 | 106 | v_int8x16() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 107 | explicit v_int8x16(int8x16_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 108 | v_int8x16(schar v0, schar v1, schar v2, schar v3, schar v4, schar v5, schar v6, schar v7, |
| RyoheiHagimoto | 0:0e0631af0305 | 109 | schar v8, schar v9, schar v10, schar v11, schar v12, schar v13, schar v14, schar v15) |
| RyoheiHagimoto | 0:0e0631af0305 | 110 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 111 | schar v[] = {v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15}; |
| RyoheiHagimoto | 0:0e0631af0305 | 112 | val = vld1q_s8(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 113 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 114 | schar get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 115 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 116 | return vgetq_lane_s8(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 117 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 118 | |
| RyoheiHagimoto | 0:0e0631af0305 | 119 | int8x16_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 120 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 121 | |
| RyoheiHagimoto | 0:0e0631af0305 | 122 | struct v_uint16x8 |
| RyoheiHagimoto | 0:0e0631af0305 | 123 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 124 | typedef ushort lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 125 | enum { nlanes = 8 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 126 | |
| RyoheiHagimoto | 0:0e0631af0305 | 127 | v_uint16x8() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 128 | explicit v_uint16x8(uint16x8_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 129 | v_uint16x8(ushort v0, ushort v1, ushort v2, ushort v3, ushort v4, ushort v5, ushort v6, ushort v7) |
| RyoheiHagimoto | 0:0e0631af0305 | 130 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 131 | ushort v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; |
| RyoheiHagimoto | 0:0e0631af0305 | 132 | val = vld1q_u16(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 133 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 134 | ushort get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 135 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 136 | return vgetq_lane_u16(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 137 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 138 | |
| RyoheiHagimoto | 0:0e0631af0305 | 139 | uint16x8_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 140 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 141 | |
| RyoheiHagimoto | 0:0e0631af0305 | 142 | struct v_int16x8 |
| RyoheiHagimoto | 0:0e0631af0305 | 143 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 144 | typedef short lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 145 | enum { nlanes = 8 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 146 | |
| RyoheiHagimoto | 0:0e0631af0305 | 147 | v_int16x8() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 148 | explicit v_int16x8(int16x8_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 149 | v_int16x8(short v0, short v1, short v2, short v3, short v4, short v5, short v6, short v7) |
| RyoheiHagimoto | 0:0e0631af0305 | 150 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 151 | short v[] = {v0, v1, v2, v3, v4, v5, v6, v7}; |
| RyoheiHagimoto | 0:0e0631af0305 | 152 | val = vld1q_s16(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 153 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 154 | short get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 155 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 156 | return vgetq_lane_s16(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 157 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 158 | |
| RyoheiHagimoto | 0:0e0631af0305 | 159 | int16x8_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 160 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 161 | |
| RyoheiHagimoto | 0:0e0631af0305 | 162 | struct v_uint32x4 |
| RyoheiHagimoto | 0:0e0631af0305 | 163 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 164 | typedef unsigned lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 165 | enum { nlanes = 4 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 166 | |
| RyoheiHagimoto | 0:0e0631af0305 | 167 | v_uint32x4() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 168 | explicit v_uint32x4(uint32x4_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 169 | v_uint32x4(unsigned v0, unsigned v1, unsigned v2, unsigned v3) |
| RyoheiHagimoto | 0:0e0631af0305 | 170 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 171 | unsigned v[] = {v0, v1, v2, v3}; |
| RyoheiHagimoto | 0:0e0631af0305 | 172 | val = vld1q_u32(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 173 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 174 | unsigned get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 175 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 176 | return vgetq_lane_u32(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 177 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 178 | |
| RyoheiHagimoto | 0:0e0631af0305 | 179 | uint32x4_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 180 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 181 | |
| RyoheiHagimoto | 0:0e0631af0305 | 182 | struct v_int32x4 |
| RyoheiHagimoto | 0:0e0631af0305 | 183 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 184 | typedef int lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 185 | enum { nlanes = 4 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 186 | |
| RyoheiHagimoto | 0:0e0631af0305 | 187 | v_int32x4() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 188 | explicit v_int32x4(int32x4_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 189 | v_int32x4(int v0, int v1, int v2, int v3) |
| RyoheiHagimoto | 0:0e0631af0305 | 190 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 191 | int v[] = {v0, v1, v2, v3}; |
| RyoheiHagimoto | 0:0e0631af0305 | 192 | val = vld1q_s32(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 193 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 194 | int get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 195 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 196 | return vgetq_lane_s32(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 197 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 198 | int32x4_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 199 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 200 | |
| RyoheiHagimoto | 0:0e0631af0305 | 201 | struct v_float32x4 |
| RyoheiHagimoto | 0:0e0631af0305 | 202 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 203 | typedef float lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 204 | enum { nlanes = 4 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 205 | |
| RyoheiHagimoto | 0:0e0631af0305 | 206 | v_float32x4() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 207 | explicit v_float32x4(float32x4_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 208 | v_float32x4(float v0, float v1, float v2, float v3) |
| RyoheiHagimoto | 0:0e0631af0305 | 209 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 210 | float v[] = {v0, v1, v2, v3}; |
| RyoheiHagimoto | 0:0e0631af0305 | 211 | val = vld1q_f32(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 212 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 213 | float get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 214 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 215 | return vgetq_lane_f32(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 216 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 217 | float32x4_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 218 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 219 | |
| RyoheiHagimoto | 0:0e0631af0305 | 220 | struct v_uint64x2 |
| RyoheiHagimoto | 0:0e0631af0305 | 221 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 222 | typedef uint64 lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 223 | enum { nlanes = 2 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 224 | |
| RyoheiHagimoto | 0:0e0631af0305 | 225 | v_uint64x2() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 226 | explicit v_uint64x2(uint64x2_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 227 | v_uint64x2(unsigned v0, unsigned v1) |
| RyoheiHagimoto | 0:0e0631af0305 | 228 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 229 | uint64 v[] = {v0, v1}; |
| RyoheiHagimoto | 0:0e0631af0305 | 230 | val = vld1q_u64(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 231 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 232 | uint64 get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 233 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 234 | return vgetq_lane_u64(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 235 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 236 | uint64x2_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 237 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 238 | |
| RyoheiHagimoto | 0:0e0631af0305 | 239 | struct v_int64x2 |
| RyoheiHagimoto | 0:0e0631af0305 | 240 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 241 | typedef int64 lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 242 | enum { nlanes = 2 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 243 | |
| RyoheiHagimoto | 0:0e0631af0305 | 244 | v_int64x2() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 245 | explicit v_int64x2(int64x2_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 246 | v_int64x2(int v0, int v1) |
| RyoheiHagimoto | 0:0e0631af0305 | 247 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 248 | int64 v[] = {v0, v1}; |
| RyoheiHagimoto | 0:0e0631af0305 | 249 | val = vld1q_s64(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 250 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 251 | int64 get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 252 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 253 | return vgetq_lane_s64(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 254 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 255 | int64x2_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 256 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 257 | |
| RyoheiHagimoto | 0:0e0631af0305 | 258 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 259 | struct v_float64x2 |
| RyoheiHagimoto | 0:0e0631af0305 | 260 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 261 | typedef double lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 262 | enum { nlanes = 2 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 263 | |
| RyoheiHagimoto | 0:0e0631af0305 | 264 | v_float64x2() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 265 | explicit v_float64x2(float64x2_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 266 | v_float64x2(double v0, double v1) |
| RyoheiHagimoto | 0:0e0631af0305 | 267 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 268 | double v[] = {v0, v1}; |
| RyoheiHagimoto | 0:0e0631af0305 | 269 | val = vld1q_f64(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 270 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 271 | double get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 272 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 273 | return vgetq_lane_f64(val, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 274 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 275 | float64x2_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 276 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 277 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 278 | |
| RyoheiHagimoto | 0:0e0631af0305 | 279 | #if defined (HAVE_FP16) |
| RyoheiHagimoto | 0:0e0631af0305 | 280 | // Workaround for old comiplers |
| RyoheiHagimoto | 0:0e0631af0305 | 281 | template <typename T> static inline int16x4_t vreinterpret_s16_f16(T a) |
| RyoheiHagimoto | 0:0e0631af0305 | 282 | { return (int16x4_t)a; } |
| RyoheiHagimoto | 0:0e0631af0305 | 283 | template <typename T> static inline float16x4_t vreinterpret_f16_s16(T a) |
| RyoheiHagimoto | 0:0e0631af0305 | 284 | { return (float16x4_t)a; } |
| RyoheiHagimoto | 0:0e0631af0305 | 285 | template <typename T> static inline float16x4_t vld1_f16(const T* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 286 | { return vreinterpret_f16_s16(vld1_s16((const short*)ptr)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 287 | template <typename T> static inline void vst1_f16(T* ptr, float16x4_t a) |
| RyoheiHagimoto | 0:0e0631af0305 | 288 | { vst1_s16((short*)ptr, vreinterpret_s16_f16(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 289 | |
| RyoheiHagimoto | 0:0e0631af0305 | 290 | struct v_float16x4 |
| RyoheiHagimoto | 0:0e0631af0305 | 291 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 292 | typedef short lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 293 | enum { nlanes = 4 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 294 | |
| RyoheiHagimoto | 0:0e0631af0305 | 295 | v_float16x4() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 296 | explicit v_float16x4(float16x4_t v) : val(v) {} |
| RyoheiHagimoto | 0:0e0631af0305 | 297 | v_float16x4(short v0, short v1, short v2, short v3) |
| RyoheiHagimoto | 0:0e0631af0305 | 298 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 299 | short v[] = {v0, v1, v2, v3}; |
| RyoheiHagimoto | 0:0e0631af0305 | 300 | val = vld1_f16(v); |
| RyoheiHagimoto | 0:0e0631af0305 | 301 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 302 | short get0() const |
| RyoheiHagimoto | 0:0e0631af0305 | 303 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 304 | return vget_lane_s16(vreinterpret_s16_f16(val), 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 305 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 306 | float16x4_t val; |
| RyoheiHagimoto | 0:0e0631af0305 | 307 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 308 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 309 | |
| RyoheiHagimoto | 0:0e0631af0305 | 310 | #define OPENCV_HAL_IMPL_NEON_INIT(_Tpv, _Tp, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 311 | inline v_##_Tpv v_setzero_##suffix() { return v_##_Tpv(vdupq_n_##suffix((_Tp)0)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 312 | inline v_##_Tpv v_setall_##suffix(_Tp v) { return v_##_Tpv(vdupq_n_##suffix(v)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 313 | inline _Tpv##_t vreinterpretq_##suffix##_##suffix(_Tpv##_t v) { return v; } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 314 | inline v_uint8x16 v_reinterpret_as_u8(const v_##_Tpv& v) { return v_uint8x16(vreinterpretq_u8_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 315 | inline v_int8x16 v_reinterpret_as_s8(const v_##_Tpv& v) { return v_int8x16(vreinterpretq_s8_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 316 | inline v_uint16x8 v_reinterpret_as_u16(const v_##_Tpv& v) { return v_uint16x8(vreinterpretq_u16_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 317 | inline v_int16x8 v_reinterpret_as_s16(const v_##_Tpv& v) { return v_int16x8(vreinterpretq_s16_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 318 | inline v_uint32x4 v_reinterpret_as_u32(const v_##_Tpv& v) { return v_uint32x4(vreinterpretq_u32_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 319 | inline v_int32x4 v_reinterpret_as_s32(const v_##_Tpv& v) { return v_int32x4(vreinterpretq_s32_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 320 | inline v_uint64x2 v_reinterpret_as_u64(const v_##_Tpv& v) { return v_uint64x2(vreinterpretq_u64_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 321 | inline v_int64x2 v_reinterpret_as_s64(const v_##_Tpv& v) { return v_int64x2(vreinterpretq_s64_##suffix(v.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 322 | inline v_float32x4 v_reinterpret_as_f32(const v_##_Tpv& v) { return v_float32x4(vreinterpretq_f32_##suffix(v.val)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 323 | |
| RyoheiHagimoto | 0:0e0631af0305 | 324 | OPENCV_HAL_IMPL_NEON_INIT(uint8x16, uchar, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 325 | OPENCV_HAL_IMPL_NEON_INIT(int8x16, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 326 | OPENCV_HAL_IMPL_NEON_INIT(uint16x8, ushort, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 327 | OPENCV_HAL_IMPL_NEON_INIT(int16x8, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 328 | OPENCV_HAL_IMPL_NEON_INIT(uint32x4, unsigned, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 329 | OPENCV_HAL_IMPL_NEON_INIT(int32x4, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 330 | OPENCV_HAL_IMPL_NEON_INIT(uint64x2, uint64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 331 | OPENCV_HAL_IMPL_NEON_INIT(int64x2, int64, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 332 | OPENCV_HAL_IMPL_NEON_INIT(float32x4, float, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 333 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 334 | #define OPENCV_HAL_IMPL_NEON_INIT_64(_Tpv, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 335 | inline v_float64x2 v_reinterpret_as_f64(const v_##_Tpv& v) { return v_float64x2(vreinterpretq_f64_##suffix(v.val)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 336 | OPENCV_HAL_IMPL_NEON_INIT(float64x2, double, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 337 | OPENCV_HAL_IMPL_NEON_INIT_64(uint8x16, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 338 | OPENCV_HAL_IMPL_NEON_INIT_64(int8x16, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 339 | OPENCV_HAL_IMPL_NEON_INIT_64(uint16x8, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 340 | OPENCV_HAL_IMPL_NEON_INIT_64(int16x8, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 341 | OPENCV_HAL_IMPL_NEON_INIT_64(uint32x4, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 342 | OPENCV_HAL_IMPL_NEON_INIT_64(int32x4, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 343 | OPENCV_HAL_IMPL_NEON_INIT_64(uint64x2, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 344 | OPENCV_HAL_IMPL_NEON_INIT_64(int64x2, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 345 | OPENCV_HAL_IMPL_NEON_INIT_64(float32x4, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 346 | OPENCV_HAL_IMPL_NEON_INIT_64(float64x2, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 347 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 348 | |
| RyoheiHagimoto | 0:0e0631af0305 | 349 | #define OPENCV_HAL_IMPL_NEON_PACK(_Tpvec, _Tp, hreg, suffix, _Tpwvec, wsuffix, pack, op) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 350 | inline _Tpvec v_##pack(const _Tpwvec& a, const _Tpwvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 351 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 352 | hreg a1 = vqmov##op##_##wsuffix(a.val), b1 = vqmov##op##_##wsuffix(b.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 353 | return _Tpvec(vcombine_##suffix(a1, b1)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 354 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 355 | inline void v_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 356 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 357 | hreg a1 = vqmov##op##_##wsuffix(a.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 358 | vst1_##suffix(ptr, a1); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 359 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 360 | template<int n> inline \ |
| RyoheiHagimoto | 0:0e0631af0305 | 361 | _Tpvec v_rshr_##pack(const _Tpwvec& a, const _Tpwvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 362 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 363 | hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 364 | hreg b1 = vqrshr##op##_n_##wsuffix(b.val, n); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 365 | return _Tpvec(vcombine_##suffix(a1, b1)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 366 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 367 | template<int n> inline \ |
| RyoheiHagimoto | 0:0e0631af0305 | 368 | void v_rshr_##pack##_store(_Tp* ptr, const _Tpwvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 369 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 370 | hreg a1 = vqrshr##op##_n_##wsuffix(a.val, n); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 371 | vst1_##suffix(ptr, a1); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 372 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 373 | |
| RyoheiHagimoto | 0:0e0631af0305 | 374 | OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_uint16x8, u16, pack, n) |
| RyoheiHagimoto | 0:0e0631af0305 | 375 | OPENCV_HAL_IMPL_NEON_PACK(v_int8x16, schar, int8x8_t, s8, v_int16x8, s16, pack, n) |
| RyoheiHagimoto | 0:0e0631af0305 | 376 | OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_uint32x4, u32, pack, n) |
| RyoheiHagimoto | 0:0e0631af0305 | 377 | OPENCV_HAL_IMPL_NEON_PACK(v_int16x8, short, int16x4_t, s16, v_int32x4, s32, pack, n) |
| RyoheiHagimoto | 0:0e0631af0305 | 378 | OPENCV_HAL_IMPL_NEON_PACK(v_uint32x4, unsigned, uint32x2_t, u32, v_uint64x2, u64, pack, n) |
| RyoheiHagimoto | 0:0e0631af0305 | 379 | OPENCV_HAL_IMPL_NEON_PACK(v_int32x4, int, int32x2_t, s32, v_int64x2, s64, pack, n) |
| RyoheiHagimoto | 0:0e0631af0305 | 380 | |
| RyoheiHagimoto | 0:0e0631af0305 | 381 | OPENCV_HAL_IMPL_NEON_PACK(v_uint8x16, uchar, uint8x8_t, u8, v_int16x8, s16, pack_u, un) |
| RyoheiHagimoto | 0:0e0631af0305 | 382 | OPENCV_HAL_IMPL_NEON_PACK(v_uint16x8, ushort, uint16x4_t, u16, v_int32x4, s32, pack_u, un) |
| RyoheiHagimoto | 0:0e0631af0305 | 383 | |
| RyoheiHagimoto | 0:0e0631af0305 | 384 | inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, |
| RyoheiHagimoto | 0:0e0631af0305 | 385 | const v_float32x4& m1, const v_float32x4& m2, |
| RyoheiHagimoto | 0:0e0631af0305 | 386 | const v_float32x4& m3) |
| RyoheiHagimoto | 0:0e0631af0305 | 387 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 388 | float32x2_t vl = vget_low_f32(v.val), vh = vget_high_f32(v.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 389 | float32x4_t res = vmulq_lane_f32(m0.val, vl, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 390 | res = vmlaq_lane_f32(res, m1.val, vl, 1); |
| RyoheiHagimoto | 0:0e0631af0305 | 391 | res = vmlaq_lane_f32(res, m2.val, vh, 0); |
| RyoheiHagimoto | 0:0e0631af0305 | 392 | res = vmlaq_lane_f32(res, m3.val, vh, 1); |
| RyoheiHagimoto | 0:0e0631af0305 | 393 | return v_float32x4(res); |
| RyoheiHagimoto | 0:0e0631af0305 | 394 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 395 | |
| RyoheiHagimoto | 0:0e0631af0305 | 396 | #define OPENCV_HAL_IMPL_NEON_BIN_OP(bin_op, _Tpvec, intrin) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 397 | inline _Tpvec operator bin_op (const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 398 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 399 | return _Tpvec(intrin(a.val, b.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 400 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 401 | inline _Tpvec& operator bin_op##= (_Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 402 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 403 | a.val = intrin(a.val, b.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 404 | return a; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 405 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 406 | |
| RyoheiHagimoto | 0:0e0631af0305 | 407 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint8x16, vqaddq_u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 408 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint8x16, vqsubq_u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 409 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int8x16, vqaddq_s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 410 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int8x16, vqsubq_s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 411 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint16x8, vqaddq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 412 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint16x8, vqsubq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 413 | OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint16x8, vmulq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 414 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int16x8, vqaddq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 415 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int16x8, vqsubq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 416 | OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int16x8, vmulq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 417 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int32x4, vaddq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 418 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int32x4, vsubq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 419 | OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_int32x4, vmulq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 420 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint32x4, vaddq_u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 421 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint32x4, vsubq_u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 422 | OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_uint32x4, vmulq_u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 423 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float32x4, vaddq_f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 424 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float32x4, vsubq_f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 425 | OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float32x4, vmulq_f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 426 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_int64x2, vaddq_s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 427 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_int64x2, vsubq_s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 428 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_uint64x2, vaddq_u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 429 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_uint64x2, vsubq_u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 430 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 431 | OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float32x4, vdivq_f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 432 | OPENCV_HAL_IMPL_NEON_BIN_OP(+, v_float64x2, vaddq_f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 433 | OPENCV_HAL_IMPL_NEON_BIN_OP(-, v_float64x2, vsubq_f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 434 | OPENCV_HAL_IMPL_NEON_BIN_OP(*, v_float64x2, vmulq_f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 435 | OPENCV_HAL_IMPL_NEON_BIN_OP(/, v_float64x2, vdivq_f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 436 | #else |
| RyoheiHagimoto | 0:0e0631af0305 | 437 | inline v_float32x4 operator / (const v_float32x4& a, const v_float32x4& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 438 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 439 | float32x4_t reciprocal = vrecpeq_f32(b.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 440 | reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); |
| RyoheiHagimoto | 0:0e0631af0305 | 441 | reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); |
| RyoheiHagimoto | 0:0e0631af0305 | 442 | return v_float32x4(vmulq_f32(a.val, reciprocal)); |
| RyoheiHagimoto | 0:0e0631af0305 | 443 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 444 | inline v_float32x4& operator /= (v_float32x4& a, const v_float32x4& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 445 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 446 | float32x4_t reciprocal = vrecpeq_f32(b.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 447 | reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); |
| RyoheiHagimoto | 0:0e0631af0305 | 448 | reciprocal = vmulq_f32(vrecpsq_f32(b.val, reciprocal), reciprocal); |
| RyoheiHagimoto | 0:0e0631af0305 | 449 | a.val = vmulq_f32(a.val, reciprocal); |
| RyoheiHagimoto | 0:0e0631af0305 | 450 | return a; |
| RyoheiHagimoto | 0:0e0631af0305 | 451 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 452 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 453 | |
| RyoheiHagimoto | 0:0e0631af0305 | 454 | inline void v_mul_expand(const v_int16x8& a, const v_int16x8& b, |
| RyoheiHagimoto | 0:0e0631af0305 | 455 | v_int32x4& c, v_int32x4& d) |
| RyoheiHagimoto | 0:0e0631af0305 | 456 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 457 | c.val = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 458 | d.val = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 459 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 460 | |
| RyoheiHagimoto | 0:0e0631af0305 | 461 | inline void v_mul_expand(const v_uint16x8& a, const v_uint16x8& b, |
| RyoheiHagimoto | 0:0e0631af0305 | 462 | v_uint32x4& c, v_uint32x4& d) |
| RyoheiHagimoto | 0:0e0631af0305 | 463 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 464 | c.val = vmull_u16(vget_low_u16(a.val), vget_low_u16(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 465 | d.val = vmull_u16(vget_high_u16(a.val), vget_high_u16(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 466 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 467 | |
| RyoheiHagimoto | 0:0e0631af0305 | 468 | inline void v_mul_expand(const v_uint32x4& a, const v_uint32x4& b, |
| RyoheiHagimoto | 0:0e0631af0305 | 469 | v_uint64x2& c, v_uint64x2& d) |
| RyoheiHagimoto | 0:0e0631af0305 | 470 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 471 | c.val = vmull_u32(vget_low_u32(a.val), vget_low_u32(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 472 | d.val = vmull_u32(vget_high_u32(a.val), vget_high_u32(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 473 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 474 | |
| RyoheiHagimoto | 0:0e0631af0305 | 475 | inline v_int32x4 v_dotprod(const v_int16x8& a, const v_int16x8& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 476 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 477 | int32x4_t c = vmull_s16(vget_low_s16(a.val), vget_low_s16(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 478 | int32x4_t d = vmull_s16(vget_high_s16(a.val), vget_high_s16(b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 479 | int32x4x2_t cd = vuzpq_s32(c, d); |
| RyoheiHagimoto | 0:0e0631af0305 | 480 | return v_int32x4(vaddq_s32(cd.val[0], cd.val[1])); |
| RyoheiHagimoto | 0:0e0631af0305 | 481 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 482 | |
| RyoheiHagimoto | 0:0e0631af0305 | 483 | #define OPENCV_HAL_IMPL_NEON_LOGIC_OP(_Tpvec, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 484 | OPENCV_HAL_IMPL_NEON_BIN_OP(&, _Tpvec, vandq_##suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 485 | OPENCV_HAL_IMPL_NEON_BIN_OP(|, _Tpvec, vorrq_##suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 486 | OPENCV_HAL_IMPL_NEON_BIN_OP(^, _Tpvec, veorq_##suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 487 | inline _Tpvec operator ~ (const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 488 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 489 | return _Tpvec(vreinterpretq_##suffix##_u8(vmvnq_u8(vreinterpretq_u8_##suffix(a.val)))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 490 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 491 | |
| RyoheiHagimoto | 0:0e0631af0305 | 492 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint8x16, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 493 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int8x16, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 494 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint16x8, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 495 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int16x8, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 496 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint32x4, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 497 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int32x4, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 498 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_uint64x2, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 499 | OPENCV_HAL_IMPL_NEON_LOGIC_OP(v_int64x2, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 500 | |
| RyoheiHagimoto | 0:0e0631af0305 | 501 | #define OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(bin_op, intrin) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 502 | inline v_float32x4 operator bin_op (const v_float32x4& a, const v_float32x4& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 503 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 504 | return v_float32x4(vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val)))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 505 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 506 | inline v_float32x4& operator bin_op##= (v_float32x4& a, const v_float32x4& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 507 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 508 | a.val = vreinterpretq_f32_s32(intrin(vreinterpretq_s32_f32(a.val), vreinterpretq_s32_f32(b.val))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 509 | return a; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 510 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 511 | |
| RyoheiHagimoto | 0:0e0631af0305 | 512 | OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(&, vandq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 513 | OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(|, vorrq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 514 | OPENCV_HAL_IMPL_NEON_FLT_BIT_OP(^, veorq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 515 | |
| RyoheiHagimoto | 0:0e0631af0305 | 516 | inline v_float32x4 operator ~ (const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 517 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 518 | return v_float32x4(vreinterpretq_f32_s32(vmvnq_s32(vreinterpretq_s32_f32(a.val)))); |
| RyoheiHagimoto | 0:0e0631af0305 | 519 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 520 | |
| RyoheiHagimoto | 0:0e0631af0305 | 521 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 522 | inline v_float32x4 v_sqrt(const v_float32x4& x) |
| RyoheiHagimoto | 0:0e0631af0305 | 523 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 524 | return v_float32x4(vsqrtq_f32(x.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 525 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 526 | |
| RyoheiHagimoto | 0:0e0631af0305 | 527 | inline v_float32x4 v_invsqrt(const v_float32x4& x) |
| RyoheiHagimoto | 0:0e0631af0305 | 528 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 529 | v_float32x4 one = v_setall_f32(1.0f); |
| RyoheiHagimoto | 0:0e0631af0305 | 530 | return one / v_sqrt(x); |
| RyoheiHagimoto | 0:0e0631af0305 | 531 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 532 | #else |
| RyoheiHagimoto | 0:0e0631af0305 | 533 | inline v_float32x4 v_sqrt(const v_float32x4& x) |
| RyoheiHagimoto | 0:0e0631af0305 | 534 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 535 | float32x4_t x1 = vmaxq_f32(x.val, vdupq_n_f32(FLT_MIN)); |
| RyoheiHagimoto | 0:0e0631af0305 | 536 | float32x4_t e = vrsqrteq_f32(x1); |
| RyoheiHagimoto | 0:0e0631af0305 | 537 | e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); |
| RyoheiHagimoto | 0:0e0631af0305 | 538 | e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x1, e), e), e); |
| RyoheiHagimoto | 0:0e0631af0305 | 539 | return v_float32x4(vmulq_f32(x.val, e)); |
| RyoheiHagimoto | 0:0e0631af0305 | 540 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 541 | |
| RyoheiHagimoto | 0:0e0631af0305 | 542 | inline v_float32x4 v_invsqrt(const v_float32x4& x) |
| RyoheiHagimoto | 0:0e0631af0305 | 543 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 544 | float32x4_t e = vrsqrteq_f32(x.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 545 | e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e); |
| RyoheiHagimoto | 0:0e0631af0305 | 546 | e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x.val, e), e), e); |
| RyoheiHagimoto | 0:0e0631af0305 | 547 | return v_float32x4(e); |
| RyoheiHagimoto | 0:0e0631af0305 | 548 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 549 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 550 | |
| RyoheiHagimoto | 0:0e0631af0305 | 551 | #define OPENCV_HAL_IMPL_NEON_ABS(_Tpuvec, _Tpsvec, usuffix, ssuffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 552 | inline _Tpuvec v_abs(const _Tpsvec& a) { return v_reinterpret_as_##usuffix(_Tpsvec(vabsq_##ssuffix(a.val))); } |
| RyoheiHagimoto | 0:0e0631af0305 | 553 | |
| RyoheiHagimoto | 0:0e0631af0305 | 554 | OPENCV_HAL_IMPL_NEON_ABS(v_uint8x16, v_int8x16, u8, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 555 | OPENCV_HAL_IMPL_NEON_ABS(v_uint16x8, v_int16x8, u16, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 556 | OPENCV_HAL_IMPL_NEON_ABS(v_uint32x4, v_int32x4, u32, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 557 | |
| RyoheiHagimoto | 0:0e0631af0305 | 558 | inline v_float32x4 v_abs(v_float32x4 x) |
| RyoheiHagimoto | 0:0e0631af0305 | 559 | { return v_float32x4(vabsq_f32(x.val)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 560 | |
| RyoheiHagimoto | 0:0e0631af0305 | 561 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 562 | #define OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(bin_op, intrin) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 563 | inline v_float64x2 operator bin_op (const v_float64x2& a, const v_float64x2& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 564 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 565 | return v_float64x2(vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val)))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 566 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 567 | inline v_float64x2& operator bin_op##= (v_float64x2& a, const v_float64x2& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 568 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 569 | a.val = vreinterpretq_f64_s64(intrin(vreinterpretq_s64_f64(a.val), vreinterpretq_s64_f64(b.val))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 570 | return a; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 571 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 572 | |
| RyoheiHagimoto | 0:0e0631af0305 | 573 | OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(&, vandq_s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 574 | OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(|, vorrq_s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 575 | OPENCV_HAL_IMPL_NEON_DBL_BIT_OP(^, veorq_s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 576 | |
| RyoheiHagimoto | 0:0e0631af0305 | 577 | inline v_float64x2 operator ~ (const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 578 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 579 | return v_float64x2(vreinterpretq_f64_s32(vmvnq_s32(vreinterpretq_s32_f64(a.val)))); |
| RyoheiHagimoto | 0:0e0631af0305 | 580 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 581 | |
| RyoheiHagimoto | 0:0e0631af0305 | 582 | inline v_float64x2 v_sqrt(const v_float64x2& x) |
| RyoheiHagimoto | 0:0e0631af0305 | 583 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 584 | return v_float64x2(vsqrtq_f64(x.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 585 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 586 | |
| RyoheiHagimoto | 0:0e0631af0305 | 587 | inline v_float64x2 v_invsqrt(const v_float64x2& x) |
| RyoheiHagimoto | 0:0e0631af0305 | 588 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 589 | v_float64x2 one = v_setall_f64(1.0f); |
| RyoheiHagimoto | 0:0e0631af0305 | 590 | return one / v_sqrt(x); |
| RyoheiHagimoto | 0:0e0631af0305 | 591 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 592 | |
| RyoheiHagimoto | 0:0e0631af0305 | 593 | inline v_float64x2 v_abs(v_float64x2 x) |
| RyoheiHagimoto | 0:0e0631af0305 | 594 | { return v_float64x2(vabsq_f64(x.val)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 595 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 596 | |
| RyoheiHagimoto | 0:0e0631af0305 | 597 | // TODO: exp, log, sin, cos |
| RyoheiHagimoto | 0:0e0631af0305 | 598 | |
| RyoheiHagimoto | 0:0e0631af0305 | 599 | #define OPENCV_HAL_IMPL_NEON_BIN_FUNC(_Tpvec, func, intrin) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 600 | inline _Tpvec func(const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 601 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 602 | return _Tpvec(intrin(a.val, b.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 603 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 604 | |
| RyoheiHagimoto | 0:0e0631af0305 | 605 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_min, vminq_u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 606 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_max, vmaxq_u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 607 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_min, vminq_s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 608 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_max, vmaxq_s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 609 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_min, vminq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 610 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_max, vmaxq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 611 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_min, vminq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 612 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_max, vmaxq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 613 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_min, vminq_u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 614 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_max, vmaxq_u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 615 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_min, vminq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 616 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int32x4, v_max, vmaxq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 617 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_min, vminq_f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 618 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_max, vmaxq_f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 619 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 620 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_min, vminq_f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 621 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_max, vmaxq_f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 622 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 623 | |
| RyoheiHagimoto | 0:0e0631af0305 | 624 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 625 | inline int64x2_t vmvnq_s64(int64x2_t a) |
| RyoheiHagimoto | 0:0e0631af0305 | 626 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 627 | int64x2_t vx = vreinterpretq_s64_u32(vdupq_n_u32(0xFFFFFFFF)); |
| RyoheiHagimoto | 0:0e0631af0305 | 628 | return veorq_s64(a, vx); |
| RyoheiHagimoto | 0:0e0631af0305 | 629 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 630 | inline uint64x2_t vmvnq_u64(uint64x2_t a) |
| RyoheiHagimoto | 0:0e0631af0305 | 631 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 632 | uint64x2_t vx = vreinterpretq_u64_u32(vdupq_n_u32(0xFFFFFFFF)); |
| RyoheiHagimoto | 0:0e0631af0305 | 633 | return veorq_u64(a, vx); |
| RyoheiHagimoto | 0:0e0631af0305 | 634 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 635 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 636 | #define OPENCV_HAL_IMPL_NEON_INT_CMP_OP(_Tpvec, cast, suffix, not_suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 637 | inline _Tpvec operator == (const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 638 | { return _Tpvec(cast(vceqq_##suffix(a.val, b.val))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 639 | inline _Tpvec operator != (const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 640 | { return _Tpvec(cast(vmvnq_##not_suffix(vceqq_##suffix(a.val, b.val)))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 641 | inline _Tpvec operator < (const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 642 | { return _Tpvec(cast(vcltq_##suffix(a.val, b.val))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 643 | inline _Tpvec operator > (const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 644 | { return _Tpvec(cast(vcgtq_##suffix(a.val, b.val))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 645 | inline _Tpvec operator <= (const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 646 | { return _Tpvec(cast(vcleq_##suffix(a.val, b.val))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 647 | inline _Tpvec operator >= (const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 648 | { return _Tpvec(cast(vcgeq_##suffix(a.val, b.val))); } |
| RyoheiHagimoto | 0:0e0631af0305 | 649 | |
| RyoheiHagimoto | 0:0e0631af0305 | 650 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint8x16, OPENCV_HAL_NOP, u8, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 651 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int8x16, vreinterpretq_s8_u8, s8, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 652 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint16x8, OPENCV_HAL_NOP, u16, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 653 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int16x8, vreinterpretq_s16_u16, s16, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 654 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint32x4, OPENCV_HAL_NOP, u32, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 655 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int32x4, vreinterpretq_s32_u32, s32, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 656 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float32x4, vreinterpretq_f32_u32, f32, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 657 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 658 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_uint64x2, OPENCV_HAL_NOP, u64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 659 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_int64x2, vreinterpretq_s64_u64, s64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 660 | OPENCV_HAL_IMPL_NEON_INT_CMP_OP(v_float64x2, vreinterpretq_f64_u64, f64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 661 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 662 | |
| RyoheiHagimoto | 0:0e0631af0305 | 663 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_add_wrap, vaddq_u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 664 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_add_wrap, vaddq_s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 665 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_add_wrap, vaddq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 666 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_add_wrap, vaddq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 667 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_sub_wrap, vsubq_u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 668 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int8x16, v_sub_wrap, vsubq_s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 669 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_sub_wrap, vsubq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 670 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_int16x8, v_sub_wrap, vsubq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 671 | |
| RyoheiHagimoto | 0:0e0631af0305 | 672 | // TODO: absdiff for signed integers |
| RyoheiHagimoto | 0:0e0631af0305 | 673 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint8x16, v_absdiff, vabdq_u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 674 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint16x8, v_absdiff, vabdq_u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 675 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_uint32x4, v_absdiff, vabdq_u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 676 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float32x4, v_absdiff, vabdq_f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 677 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 678 | OPENCV_HAL_IMPL_NEON_BIN_FUNC(v_float64x2, v_absdiff, vabdq_f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 679 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 680 | |
| RyoheiHagimoto | 0:0e0631af0305 | 681 | #define OPENCV_HAL_IMPL_NEON_BIN_FUNC2(_Tpvec, _Tpvec2, cast, func, intrin) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 682 | inline _Tpvec2 func(const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 683 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 684 | return _Tpvec2(cast(intrin(a.val, b.val))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 685 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 686 | |
| RyoheiHagimoto | 0:0e0631af0305 | 687 | OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int8x16, v_uint8x16, vreinterpretq_u8_s8, v_absdiff, vabdq_s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 688 | OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int16x8, v_uint16x8, vreinterpretq_u16_s16, v_absdiff, vabdq_s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 689 | OPENCV_HAL_IMPL_NEON_BIN_FUNC2(v_int32x4, v_uint32x4, vreinterpretq_u32_s32, v_absdiff, vabdq_s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 690 | |
| RyoheiHagimoto | 0:0e0631af0305 | 691 | inline v_float32x4 v_magnitude(const v_float32x4& a, const v_float32x4& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 692 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 693 | v_float32x4 x(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 694 | return v_sqrt(x); |
| RyoheiHagimoto | 0:0e0631af0305 | 695 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 696 | |
| RyoheiHagimoto | 0:0e0631af0305 | 697 | inline v_float32x4 v_sqr_magnitude(const v_float32x4& a, const v_float32x4& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 698 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 699 | return v_float32x4(vmlaq_f32(vmulq_f32(a.val, a.val), b.val, b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 700 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 701 | |
| RyoheiHagimoto | 0:0e0631af0305 | 702 | inline v_float32x4 v_muladd(const v_float32x4& a, const v_float32x4& b, const v_float32x4& c) |
| RyoheiHagimoto | 0:0e0631af0305 | 703 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 704 | return v_float32x4(vmlaq_f32(c.val, a.val, b.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 705 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 706 | |
| RyoheiHagimoto | 0:0e0631af0305 | 707 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 708 | inline v_float64x2 v_magnitude(const v_float64x2& a, const v_float64x2& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 709 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 710 | v_float64x2 x(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); |
| RyoheiHagimoto | 0:0e0631af0305 | 711 | return v_sqrt(x); |
| RyoheiHagimoto | 0:0e0631af0305 | 712 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 713 | |
| RyoheiHagimoto | 0:0e0631af0305 | 714 | inline v_float64x2 v_sqr_magnitude(const v_float64x2& a, const v_float64x2& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 715 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 716 | return v_float64x2(vaddq_f64(vmulq_f64(a.val, a.val), vmulq_f64(b.val, b.val))); |
| RyoheiHagimoto | 0:0e0631af0305 | 717 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 718 | |
| RyoheiHagimoto | 0:0e0631af0305 | 719 | inline v_float64x2 v_muladd(const v_float64x2& a, const v_float64x2& b, const v_float64x2& c) |
| RyoheiHagimoto | 0:0e0631af0305 | 720 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 721 | return v_float64x2(vaddq_f64(c.val, vmulq_f64(a.val, b.val))); |
| RyoheiHagimoto | 0:0e0631af0305 | 722 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 723 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 724 | |
| RyoheiHagimoto | 0:0e0631af0305 | 725 | // trade efficiency for convenience |
| RyoheiHagimoto | 0:0e0631af0305 | 726 | #define OPENCV_HAL_IMPL_NEON_SHIFT_OP(_Tpvec, suffix, _Tps, ssuffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 727 | inline _Tpvec operator << (const _Tpvec& a, int n) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 728 | { return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)n))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 729 | inline _Tpvec operator >> (const _Tpvec& a, int n) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 730 | { return _Tpvec(vshlq_##suffix(a.val, vdupq_n_##ssuffix((_Tps)-n))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 731 | template<int n> inline _Tpvec v_shl(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 732 | { return _Tpvec(vshlq_n_##suffix(a.val, n)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 733 | template<int n> inline _Tpvec v_shr(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 734 | { return _Tpvec(vshrq_n_##suffix(a.val, n)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 735 | template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 736 | { return _Tpvec(vrshrq_n_##suffix(a.val, n)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 737 | |
| RyoheiHagimoto | 0:0e0631af0305 | 738 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint8x16, u8, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 739 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int8x16, s8, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 740 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint16x8, u16, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 741 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int16x8, s16, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 742 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint32x4, u32, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 743 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int32x4, s32, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 744 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_uint64x2, u64, int64, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 745 | OPENCV_HAL_IMPL_NEON_SHIFT_OP(v_int64x2, s64, int64, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 746 | |
| RyoheiHagimoto | 0:0e0631af0305 | 747 | #define OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(_Tpvec, _Tp, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 748 | inline _Tpvec v_load(const _Tp* ptr) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 749 | { return _Tpvec(vld1q_##suffix(ptr)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 750 | inline _Tpvec v_load_aligned(const _Tp* ptr) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 751 | { return _Tpvec(vld1q_##suffix(ptr)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 752 | inline _Tpvec v_load_halves(const _Tp* ptr0, const _Tp* ptr1) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 753 | { return _Tpvec(vcombine_##suffix(vld1_##suffix(ptr0), vld1_##suffix(ptr1))); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 754 | inline void v_store(_Tp* ptr, const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 755 | { vst1q_##suffix(ptr, a.val); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 756 | inline void v_store_aligned(_Tp* ptr, const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 757 | { vst1q_##suffix(ptr, a.val); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 758 | inline void v_store_low(_Tp* ptr, const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 759 | { vst1_##suffix(ptr, vget_low_##suffix(a.val)); } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 760 | inline void v_store_high(_Tp* ptr, const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 761 | { vst1_##suffix(ptr, vget_high_##suffix(a.val)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 762 | |
| RyoheiHagimoto | 0:0e0631af0305 | 763 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint8x16, uchar, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 764 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int8x16, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 765 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint16x8, ushort, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 766 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int16x8, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 767 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint32x4, unsigned, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 768 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int32x4, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 769 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_uint64x2, uint64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 770 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_int64x2, int64, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 771 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float32x4, float, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 772 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 773 | OPENCV_HAL_IMPL_NEON_LOADSTORE_OP(v_float64x2, double, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 774 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 775 | |
| RyoheiHagimoto | 0:0e0631af0305 | 776 | #if defined (HAVE_FP16) |
| RyoheiHagimoto | 0:0e0631af0305 | 777 | // Workaround for old comiplers |
| RyoheiHagimoto | 0:0e0631af0305 | 778 | inline v_float16x4 v_load_f16(const short* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 779 | { return v_float16x4(vld1_f16(ptr)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 780 | inline void v_store_f16(short* ptr, v_float16x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 781 | { vst1_f16(ptr, a.val); } |
| RyoheiHagimoto | 0:0e0631af0305 | 782 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 783 | |
| RyoheiHagimoto | 0:0e0631af0305 | 784 | #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 785 | inline scalartype v_reduce_##func(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 786 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 787 | _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 788 | a0 = vp##vectorfunc##_##suffix(a0, a0); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 789 | return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, a0),0); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 790 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 791 | |
| RyoheiHagimoto | 0:0e0631af0305 | 792 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, sum, add, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 793 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, max, max, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 794 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_uint16x8, uint16x4, unsigned short, min, min, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 795 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, sum, add, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 796 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, max, max, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 797 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_8(v_int16x8, int16x4, short, min, min, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 798 | |
| RyoheiHagimoto | 0:0e0631af0305 | 799 | #define OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(_Tpvec, _Tpnvec, scalartype, func, vectorfunc, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 800 | inline scalartype v_reduce_##func(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 801 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 802 | _Tpnvec##_t a0 = vp##vectorfunc##_##suffix(vget_low_##suffix(a.val), vget_high_##suffix(a.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 803 | return (scalartype)vget_lane_##suffix(vp##vectorfunc##_##suffix(a0, vget_high_##suffix(a.val)),0); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 804 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 805 | |
| RyoheiHagimoto | 0:0e0631af0305 | 806 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, sum, add, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 807 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, max, max, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 808 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_uint32x4, uint32x2, unsigned, min, min, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 809 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, sum, add, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 810 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, max, max, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 811 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_int32x4, int32x2, int, min, min, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 812 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, sum, add, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 813 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, max, max, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 814 | OPENCV_HAL_IMPL_NEON_REDUCE_OP_4(v_float32x4, float32x2, float, min, min, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 815 | |
| RyoheiHagimoto | 0:0e0631af0305 | 816 | inline int v_signmask(const v_uint8x16& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 817 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 818 | int8x8_t m0 = vcreate_s8(CV_BIG_UINT(0x0706050403020100)); |
| RyoheiHagimoto | 0:0e0631af0305 | 819 | uint8x16_t v0 = vshlq_u8(vshrq_n_u8(a.val, 7), vcombine_s8(m0, m0)); |
| RyoheiHagimoto | 0:0e0631af0305 | 820 | uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(vpaddlq_u8(v0))); |
| RyoheiHagimoto | 0:0e0631af0305 | 821 | return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 8); |
| RyoheiHagimoto | 0:0e0631af0305 | 822 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 823 | inline int v_signmask(const v_int8x16& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 824 | { return v_signmask(v_reinterpret_as_u8(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 825 | |
| RyoheiHagimoto | 0:0e0631af0305 | 826 | inline int v_signmask(const v_uint16x8& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 827 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 828 | int16x4_t m0 = vcreate_s16(CV_BIG_UINT(0x0003000200010000)); |
| RyoheiHagimoto | 0:0e0631af0305 | 829 | uint16x8_t v0 = vshlq_u16(vshrq_n_u16(a.val, 15), vcombine_s16(m0, m0)); |
| RyoheiHagimoto | 0:0e0631af0305 | 830 | uint64x2_t v1 = vpaddlq_u32(vpaddlq_u16(v0)); |
| RyoheiHagimoto | 0:0e0631af0305 | 831 | return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 4); |
| RyoheiHagimoto | 0:0e0631af0305 | 832 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 833 | inline int v_signmask(const v_int16x8& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 834 | { return v_signmask(v_reinterpret_as_u16(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 835 | |
| RyoheiHagimoto | 0:0e0631af0305 | 836 | inline int v_signmask(const v_uint32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 837 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 838 | int32x2_t m0 = vcreate_s32(CV_BIG_UINT(0x0000000100000000)); |
| RyoheiHagimoto | 0:0e0631af0305 | 839 | uint32x4_t v0 = vshlq_u32(vshrq_n_u32(a.val, 31), vcombine_s32(m0, m0)); |
| RyoheiHagimoto | 0:0e0631af0305 | 840 | uint64x2_t v1 = vpaddlq_u32(v0); |
| RyoheiHagimoto | 0:0e0631af0305 | 841 | return (int)vgetq_lane_u64(v1, 0) + ((int)vgetq_lane_u64(v1, 1) << 2); |
| RyoheiHagimoto | 0:0e0631af0305 | 842 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 843 | inline int v_signmask(const v_int32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 844 | { return v_signmask(v_reinterpret_as_u32(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 845 | inline int v_signmask(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 846 | { return v_signmask(v_reinterpret_as_u32(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 847 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 848 | inline int v_signmask(const v_uint64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 849 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 850 | int64x1_t m0 = vdup_n_s64(0); |
| RyoheiHagimoto | 0:0e0631af0305 | 851 | uint64x2_t v0 = vshlq_u64(vshrq_n_u64(a.val, 63), vcombine_s64(m0, m0)); |
| RyoheiHagimoto | 0:0e0631af0305 | 852 | return (int)vgetq_lane_u64(v0, 0) + ((int)vgetq_lane_u64(v0, 1) << 1); |
| RyoheiHagimoto | 0:0e0631af0305 | 853 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 854 | inline int v_signmask(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 855 | { return v_signmask(v_reinterpret_as_u64(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 856 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 857 | |
| RyoheiHagimoto | 0:0e0631af0305 | 858 | #define OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(_Tpvec, suffix, shift) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 859 | inline bool v_check_all(const v_##_Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 860 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 861 | _Tpvec##_t v0 = vshrq_n_##suffix(vmvnq_##suffix(a.val), shift); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 862 | uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 863 | return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) == 0; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 864 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 865 | inline bool v_check_any(const v_##_Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 866 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 867 | _Tpvec##_t v0 = vshrq_n_##suffix(a.val, shift); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 868 | uint64x2_t v1 = vreinterpretq_u64_##suffix(v0); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 869 | return (vgetq_lane_u64(v1, 0) | vgetq_lane_u64(v1, 1)) != 0; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 870 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 871 | |
| RyoheiHagimoto | 0:0e0631af0305 | 872 | OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint8x16, u8, 7) |
| RyoheiHagimoto | 0:0e0631af0305 | 873 | OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint16x8, u16, 15) |
| RyoheiHagimoto | 0:0e0631af0305 | 874 | OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint32x4, u32, 31) |
| RyoheiHagimoto | 0:0e0631af0305 | 875 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 876 | OPENCV_HAL_IMPL_NEON_CHECK_ALLANY(uint64x2, u64, 63) |
| RyoheiHagimoto | 0:0e0631af0305 | 877 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 878 | |
| RyoheiHagimoto | 0:0e0631af0305 | 879 | inline bool v_check_all(const v_int8x16& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 880 | { return v_check_all(v_reinterpret_as_u8(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 881 | inline bool v_check_all(const v_int16x8& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 882 | { return v_check_all(v_reinterpret_as_u16(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 883 | inline bool v_check_all(const v_int32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 884 | { return v_check_all(v_reinterpret_as_u32(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 885 | inline bool v_check_all(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 886 | { return v_check_all(v_reinterpret_as_u32(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 887 | |
| RyoheiHagimoto | 0:0e0631af0305 | 888 | inline bool v_check_any(const v_int8x16& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 889 | { return v_check_any(v_reinterpret_as_u8(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 890 | inline bool v_check_any(const v_int16x8& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 891 | { return v_check_any(v_reinterpret_as_u16(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 892 | inline bool v_check_any(const v_int32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 893 | { return v_check_any(v_reinterpret_as_u32(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 894 | inline bool v_check_any(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 895 | { return v_check_any(v_reinterpret_as_u32(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 896 | |
| RyoheiHagimoto | 0:0e0631af0305 | 897 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 898 | inline bool v_check_all(const v_int64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 899 | { return v_check_all(v_reinterpret_as_u64(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 900 | inline bool v_check_all(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 901 | { return v_check_all(v_reinterpret_as_u64(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 902 | inline bool v_check_any(const v_int64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 903 | { return v_check_any(v_reinterpret_as_u64(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 904 | inline bool v_check_any(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 905 | { return v_check_any(v_reinterpret_as_u64(a)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 906 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 907 | |
| RyoheiHagimoto | 0:0e0631af0305 | 908 | #define OPENCV_HAL_IMPL_NEON_SELECT(_Tpvec, suffix, usuffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 909 | inline _Tpvec v_select(const _Tpvec& mask, const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 910 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 911 | return _Tpvec(vbslq_##suffix(vreinterpretq_##usuffix##_##suffix(mask.val), a.val, b.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 912 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 913 | |
| RyoheiHagimoto | 0:0e0631af0305 | 914 | OPENCV_HAL_IMPL_NEON_SELECT(v_uint8x16, u8, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 915 | OPENCV_HAL_IMPL_NEON_SELECT(v_int8x16, s8, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 916 | OPENCV_HAL_IMPL_NEON_SELECT(v_uint16x8, u16, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 917 | OPENCV_HAL_IMPL_NEON_SELECT(v_int16x8, s16, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 918 | OPENCV_HAL_IMPL_NEON_SELECT(v_uint32x4, u32, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 919 | OPENCV_HAL_IMPL_NEON_SELECT(v_int32x4, s32, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 920 | OPENCV_HAL_IMPL_NEON_SELECT(v_float32x4, f32, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 921 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 922 | OPENCV_HAL_IMPL_NEON_SELECT(v_float64x2, f64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 923 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 924 | |
| RyoheiHagimoto | 0:0e0631af0305 | 925 | #define OPENCV_HAL_IMPL_NEON_EXPAND(_Tpvec, _Tpwvec, _Tp, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 926 | inline void v_expand(const _Tpvec& a, _Tpwvec& b0, _Tpwvec& b1) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 927 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 928 | b0.val = vmovl_##suffix(vget_low_##suffix(a.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 929 | b1.val = vmovl_##suffix(vget_high_##suffix(a.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 930 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 931 | inline _Tpwvec v_load_expand(const _Tp* ptr) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 932 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 933 | return _Tpwvec(vmovl_##suffix(vld1_##suffix(ptr))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 934 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 935 | |
| RyoheiHagimoto | 0:0e0631af0305 | 936 | OPENCV_HAL_IMPL_NEON_EXPAND(v_uint8x16, v_uint16x8, uchar, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 937 | OPENCV_HAL_IMPL_NEON_EXPAND(v_int8x16, v_int16x8, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 938 | OPENCV_HAL_IMPL_NEON_EXPAND(v_uint16x8, v_uint32x4, ushort, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 939 | OPENCV_HAL_IMPL_NEON_EXPAND(v_int16x8, v_int32x4, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 940 | OPENCV_HAL_IMPL_NEON_EXPAND(v_uint32x4, v_uint64x2, uint, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 941 | OPENCV_HAL_IMPL_NEON_EXPAND(v_int32x4, v_int64x2, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 942 | |
| RyoheiHagimoto | 0:0e0631af0305 | 943 | inline v_uint32x4 v_load_expand_q(const uchar* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 944 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 945 | uint8x8_t v0 = vcreate_u8(*(unsigned*)ptr); |
| RyoheiHagimoto | 0:0e0631af0305 | 946 | uint16x4_t v1 = vget_low_u16(vmovl_u8(v0)); |
| RyoheiHagimoto | 0:0e0631af0305 | 947 | return v_uint32x4(vmovl_u16(v1)); |
| RyoheiHagimoto | 0:0e0631af0305 | 948 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 949 | |
| RyoheiHagimoto | 0:0e0631af0305 | 950 | inline v_int32x4 v_load_expand_q(const schar* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 951 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 952 | int8x8_t v0 = vcreate_s8(*(unsigned*)ptr); |
| RyoheiHagimoto | 0:0e0631af0305 | 953 | int16x4_t v1 = vget_low_s16(vmovl_s8(v0)); |
| RyoheiHagimoto | 0:0e0631af0305 | 954 | return v_int32x4(vmovl_s16(v1)); |
| RyoheiHagimoto | 0:0e0631af0305 | 955 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 956 | |
| RyoheiHagimoto | 0:0e0631af0305 | 957 | #if defined(__aarch64__) |
| RyoheiHagimoto | 0:0e0631af0305 | 958 | #define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 959 | inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 960 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 961 | b0.val = vzip1q_##suffix(a0.val, a1.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 962 | b1.val = vzip2q_##suffix(a0.val, a1.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 963 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 964 | inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 965 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 966 | return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 967 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 968 | inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 969 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 970 | return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 971 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 972 | inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 973 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 974 | c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 975 | d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 976 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 977 | #else |
| RyoheiHagimoto | 0:0e0631af0305 | 978 | #define OPENCV_HAL_IMPL_NEON_UNPACKS(_Tpvec, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 979 | inline void v_zip(const v_##_Tpvec& a0, const v_##_Tpvec& a1, v_##_Tpvec& b0, v_##_Tpvec& b1) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 980 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 981 | _Tpvec##x2_t p = vzipq_##suffix(a0.val, a1.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 982 | b0.val = p.val[0]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 983 | b1.val = p.val[1]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 984 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 985 | inline v_##_Tpvec v_combine_low(const v_##_Tpvec& a, const v_##_Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 986 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 987 | return v_##_Tpvec(vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 988 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 989 | inline v_##_Tpvec v_combine_high(const v_##_Tpvec& a, const v_##_Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 990 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 991 | return v_##_Tpvec(vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 992 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 993 | inline void v_recombine(const v_##_Tpvec& a, const v_##_Tpvec& b, v_##_Tpvec& c, v_##_Tpvec& d) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 994 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 995 | c.val = vcombine_##suffix(vget_low_##suffix(a.val), vget_low_##suffix(b.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 996 | d.val = vcombine_##suffix(vget_high_##suffix(a.val), vget_high_##suffix(b.val)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 997 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 998 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 999 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1000 | OPENCV_HAL_IMPL_NEON_UNPACKS(uint8x16, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1001 | OPENCV_HAL_IMPL_NEON_UNPACKS(int8x16, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1002 | OPENCV_HAL_IMPL_NEON_UNPACKS(uint16x8, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1003 | OPENCV_HAL_IMPL_NEON_UNPACKS(int16x8, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1004 | OPENCV_HAL_IMPL_NEON_UNPACKS(uint32x4, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1005 | OPENCV_HAL_IMPL_NEON_UNPACKS(int32x4, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1006 | OPENCV_HAL_IMPL_NEON_UNPACKS(float32x4, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1007 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 1008 | OPENCV_HAL_IMPL_NEON_UNPACKS(float64x2, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1009 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 1010 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1011 | #define OPENCV_HAL_IMPL_NEON_EXTRACT(_Tpvec, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1012 | template <int s> \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1013 | inline v_##_Tpvec v_extract(const v_##_Tpvec& a, const v_##_Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1014 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1015 | return v_##_Tpvec(vextq_##suffix(a.val, b.val, s)); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1016 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1017 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1018 | OPENCV_HAL_IMPL_NEON_EXTRACT(uint8x16, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1019 | OPENCV_HAL_IMPL_NEON_EXTRACT(int8x16, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1020 | OPENCV_HAL_IMPL_NEON_EXTRACT(uint16x8, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1021 | OPENCV_HAL_IMPL_NEON_EXTRACT(int16x8, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1022 | OPENCV_HAL_IMPL_NEON_EXTRACT(uint32x4, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1023 | OPENCV_HAL_IMPL_NEON_EXTRACT(int32x4, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1024 | OPENCV_HAL_IMPL_NEON_EXTRACT(uint64x2, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1025 | OPENCV_HAL_IMPL_NEON_EXTRACT(int64x2, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1026 | OPENCV_HAL_IMPL_NEON_EXTRACT(float32x4, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1027 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 1028 | OPENCV_HAL_IMPL_NEON_EXTRACT(float64x2, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1029 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 1030 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1031 | inline v_int32x4 v_round(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1032 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1033 | static const int32x4_t v_sign = vdupq_n_s32(1 << 31), |
| RyoheiHagimoto | 0:0e0631af0305 | 1034 | v_05 = vreinterpretq_s32_f32(vdupq_n_f32(0.5f)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1035 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1036 | int32x4_t v_addition = vorrq_s32(v_05, vandq_s32(v_sign, vreinterpretq_s32_f32(a.val))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1037 | return v_int32x4(vcvtq_s32_f32(vaddq_f32(a.val, vreinterpretq_f32_s32(v_addition)))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1038 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1039 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1040 | inline v_int32x4 v_floor(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1041 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1042 | int32x4_t a1 = vcvtq_s32_f32(a.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 1043 | uint32x4_t mask = vcgtq_f32(vcvtq_f32_s32(a1), a.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 1044 | return v_int32x4(vaddq_s32(a1, vreinterpretq_s32_u32(mask))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1045 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1046 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1047 | inline v_int32x4 v_ceil(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1048 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1049 | int32x4_t a1 = vcvtq_s32_f32(a.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 1050 | uint32x4_t mask = vcgtq_f32(a.val, vcvtq_f32_s32(a1)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1051 | return v_int32x4(vsubq_s32(a1, vreinterpretq_s32_u32(mask))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1052 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1053 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1054 | inline v_int32x4 v_trunc(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1055 | { return v_int32x4(vcvtq_s32_f32(a.val)); } |
| RyoheiHagimoto | 0:0e0631af0305 | 1056 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1057 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 1058 | inline v_int32x4 v_round(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1059 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1060 | static const int32x2_t zero = vdup_n_s32(0); |
| RyoheiHagimoto | 0:0e0631af0305 | 1061 | return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1062 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1063 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1064 | inline v_int32x4 v_floor(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1065 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1066 | static const int32x2_t zero = vdup_n_s32(0); |
| RyoheiHagimoto | 0:0e0631af0305 | 1067 | int64x2_t a1 = vcvtq_s64_f64(a.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 1068 | uint64x2_t mask = vcgtq_f64(vcvtq_f64_s64(a1), a.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 1069 | a1 = vaddq_s64(a1, vreinterpretq_s64_u64(mask)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1070 | return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1071 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1072 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1073 | inline v_int32x4 v_ceil(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1074 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1075 | static const int32x2_t zero = vdup_n_s32(0); |
| RyoheiHagimoto | 0:0e0631af0305 | 1076 | int64x2_t a1 = vcvtq_s64_f64(a.val); |
| RyoheiHagimoto | 0:0e0631af0305 | 1077 | uint64x2_t mask = vcgtq_f64(a.val, vcvtq_f64_s64(a1)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1078 | a1 = vsubq_s64(a1, vreinterpretq_s64_u64(mask)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1079 | return v_int32x4(vcombine_s32(vmovn_s64(a1), zero)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1080 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1081 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1082 | inline v_int32x4 v_trunc(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1083 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1084 | static const int32x2_t zero = vdup_n_s32(0); |
| RyoheiHagimoto | 0:0e0631af0305 | 1085 | return v_int32x4(vcombine_s32(vmovn_s64(vcvtaq_s64_f64(a.val)), zero)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1086 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1087 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 1088 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1089 | #define OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(_Tpvec, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1090 | inline void v_transpose4x4(const v_##_Tpvec& a0, const v_##_Tpvec& a1, \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1091 | const v_##_Tpvec& a2, const v_##_Tpvec& a3, \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1092 | v_##_Tpvec& b0, v_##_Tpvec& b1, \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1093 | v_##_Tpvec& b2, v_##_Tpvec& b3) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1094 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1095 | /* m00 m01 m02 m03 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1096 | /* m10 m11 m12 m13 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1097 | /* m20 m21 m22 m23 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1098 | /* m30 m31 m32 m33 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1099 | _Tpvec##x2_t t0 = vtrnq_##suffix(a0.val, a1.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1100 | _Tpvec##x2_t t1 = vtrnq_##suffix(a2.val, a3.val); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1101 | /* m00 m10 m02 m12 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1102 | /* m01 m11 m03 m13 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1103 | /* m20 m30 m22 m32 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1104 | /* m21 m31 m23 m33 */ \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1105 | b0.val = vcombine_##suffix(vget_low_##suffix(t0.val[0]), vget_low_##suffix(t1.val[0])); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1106 | b1.val = vcombine_##suffix(vget_low_##suffix(t0.val[1]), vget_low_##suffix(t1.val[1])); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1107 | b2.val = vcombine_##suffix(vget_high_##suffix(t0.val[0]), vget_high_##suffix(t1.val[0])); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1108 | b3.val = vcombine_##suffix(vget_high_##suffix(t0.val[1]), vget_high_##suffix(t1.val[1])); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1109 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1110 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1111 | OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(uint32x4, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1112 | OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(int32x4, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1113 | OPENCV_HAL_IMPL_NEON_TRANSPOSE4x4(float32x4, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1114 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1115 | #define OPENCV_HAL_IMPL_NEON_INTERLEAVED(_Tpvec, _Tp, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1116 | inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1117 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1118 | _Tpvec##x2_t v = vld2q_##suffix(ptr); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1119 | a.val = v.val[0]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1120 | b.val = v.val[1]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1121 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1122 | inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, v_##_Tpvec& c) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1123 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1124 | _Tpvec##x3_t v = vld3q_##suffix(ptr); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1125 | a.val = v.val[0]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1126 | b.val = v.val[1]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1127 | c.val = v.val[2]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1128 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1129 | inline void v_load_deinterleave(const _Tp* ptr, v_##_Tpvec& a, v_##_Tpvec& b, \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1130 | v_##_Tpvec& c, v_##_Tpvec& d) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1131 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1132 | _Tpvec##x4_t v = vld4q_##suffix(ptr); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1133 | a.val = v.val[0]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1134 | b.val = v.val[1]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1135 | c.val = v.val[2]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1136 | d.val = v.val[3]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1137 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1138 | inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1139 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1140 | _Tpvec##x2_t v; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1141 | v.val[0] = a.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1142 | v.val[1] = b.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1143 | vst2q_##suffix(ptr, v); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1144 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1145 | inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, const v_##_Tpvec& c) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1146 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1147 | _Tpvec##x3_t v; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1148 | v.val[0] = a.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1149 | v.val[1] = b.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1150 | v.val[2] = c.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1151 | vst3q_##suffix(ptr, v); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1152 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1153 | inline void v_store_interleave( _Tp* ptr, const v_##_Tpvec& a, const v_##_Tpvec& b, \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1154 | const v_##_Tpvec& c, const v_##_Tpvec& d) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1155 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1156 | _Tpvec##x4_t v; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1157 | v.val[0] = a.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1158 | v.val[1] = b.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1159 | v.val[2] = c.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1160 | v.val[3] = d.val; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1161 | vst4q_##suffix(ptr, v); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1162 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1163 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1164 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint8x16, uchar, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1165 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(int8x16, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1166 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint16x8, ushort, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1167 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(int16x8, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1168 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(uint32x4, unsigned, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1169 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(int32x4, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1170 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(float32x4, float, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1171 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 1172 | OPENCV_HAL_IMPL_NEON_INTERLEAVED(float64x2, double, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1173 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 1174 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1175 | inline v_float32x4 v_cvt_f32(const v_int32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1176 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1177 | return v_float32x4(vcvtq_f32_s32(a.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1178 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1179 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1180 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 1181 | inline v_float32x4 v_cvt_f32(const v_float64x2& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1182 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1183 | float32x2_t zero = vdup_n_f32(0.0f); |
| RyoheiHagimoto | 0:0e0631af0305 | 1184 | return v_float32x4(vcombine_f32(vcvt_f32_f64(a.val), zero)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1185 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1186 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1187 | inline v_float64x2 v_cvt_f64(const v_int32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1188 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1189 | return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_low_s32(a.val)))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1190 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1191 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1192 | inline v_float64x2 v_cvt_f64_high(const v_int32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1193 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1194 | return v_float64x2(vcvt_f64_f32(vcvt_f32_s32(vget_high_s32(a.val)))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1195 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1196 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1197 | inline v_float64x2 v_cvt_f64(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1198 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1199 | return v_float64x2(vcvt_f64_f32(vget_low_f32(a.val))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1200 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1201 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1202 | inline v_float64x2 v_cvt_f64_high(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1203 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1204 | return v_float64x2(vcvt_f64_f32(vget_high_f32(a.val))); |
| RyoheiHagimoto | 0:0e0631af0305 | 1205 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1206 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 1207 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1208 | #if defined (HAVE_FP16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1209 | inline v_float32x4 v_cvt_f32(const v_float16x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1210 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1211 | return v_float32x4(vcvt_f32_f16(a.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1212 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1213 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1214 | inline v_float16x4 v_cvt_f16(const v_float32x4& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1215 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1216 | return v_float16x4(vcvt_f16_f32(a.val)); |
| RyoheiHagimoto | 0:0e0631af0305 | 1217 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1218 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 1219 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1220 | //! @name Check SIMD support |
| RyoheiHagimoto | 0:0e0631af0305 | 1221 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1222 | //! @brief Check CPU capability of SIMD operation |
| RyoheiHagimoto | 0:0e0631af0305 | 1223 | static inline bool hasSIMD128() |
| RyoheiHagimoto | 0:0e0631af0305 | 1224 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1225 | return checkHardwareSupport(CV_CPU_NEON); |
| RyoheiHagimoto | 0:0e0631af0305 | 1226 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1227 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1228 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1229 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1230 | //! @endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 1231 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1232 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1233 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1234 | #endif |