openCV library for Renesas RZ/A

Dependents:   RZ_A2M_Mbed_samples

Committer:
RyoheiHagimoto
Date:
Fri Jan 29 04:53:38 2021 +0000
Revision:
0:0e0631af0305
copied from https://github.com/d-kato/opencv-lib.

Who changed what in which revision?

UserRevisionLine numberNew contents of line
RyoheiHagimoto 0:0e0631af0305 1 /*M///////////////////////////////////////////////////////////////////////////////////////
RyoheiHagimoto 0:0e0631af0305 2 //
RyoheiHagimoto 0:0e0631af0305 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
RyoheiHagimoto 0:0e0631af0305 4 //
RyoheiHagimoto 0:0e0631af0305 5 // By downloading, copying, installing or using the software you agree to this license.
RyoheiHagimoto 0:0e0631af0305 6 // If you do not agree to this license, do not download, install,
RyoheiHagimoto 0:0e0631af0305 7 // copy or use the software.
RyoheiHagimoto 0:0e0631af0305 8 //
RyoheiHagimoto 0:0e0631af0305 9 //
RyoheiHagimoto 0:0e0631af0305 10 // License Agreement
RyoheiHagimoto 0:0e0631af0305 11 // For Open Source Computer Vision Library
RyoheiHagimoto 0:0e0631af0305 12 //
RyoheiHagimoto 0:0e0631af0305 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
RyoheiHagimoto 0:0e0631af0305 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
RyoheiHagimoto 0:0e0631af0305 15 // Copyright (C) 2013, OpenCV Foundation, all rights reserved.
RyoheiHagimoto 0:0e0631af0305 16 // Copyright (C) 2015, Itseez Inc., all rights reserved.
RyoheiHagimoto 0:0e0631af0305 17 // Third party copyrights are property of their respective owners.
RyoheiHagimoto 0:0e0631af0305 18 //
RyoheiHagimoto 0:0e0631af0305 19 // Redistribution and use in source and binary forms, with or without modification,
RyoheiHagimoto 0:0e0631af0305 20 // are permitted provided that the following conditions are met:
RyoheiHagimoto 0:0e0631af0305 21 //
RyoheiHagimoto 0:0e0631af0305 22 // * Redistribution's of source code must retain the above copyright notice,
RyoheiHagimoto 0:0e0631af0305 23 // this list of conditions and the following disclaimer.
RyoheiHagimoto 0:0e0631af0305 24 //
RyoheiHagimoto 0:0e0631af0305 25 // * Redistribution's in binary form must reproduce the above copyright notice,
RyoheiHagimoto 0:0e0631af0305 26 // this list of conditions and the following disclaimer in the documentation
RyoheiHagimoto 0:0e0631af0305 27 // and/or other materials provided with the distribution.
RyoheiHagimoto 0:0e0631af0305 28 //
RyoheiHagimoto 0:0e0631af0305 29 // * The name of the copyright holders may not be used to endorse or promote products
RyoheiHagimoto 0:0e0631af0305 30 // derived from this software without specific prior written permission.
RyoheiHagimoto 0:0e0631af0305 31 //
RyoheiHagimoto 0:0e0631af0305 32 // This software is provided by the copyright holders and contributors "as is" and
RyoheiHagimoto 0:0e0631af0305 33 // any express or implied warranties, including, but not limited to, the implied
RyoheiHagimoto 0:0e0631af0305 34 // warranties of merchantability and fitness for a particular purpose are disclaimed.
RyoheiHagimoto 0:0e0631af0305 35 // In no event shall the Intel Corporation or contributors be liable for any direct,
RyoheiHagimoto 0:0e0631af0305 36 // indirect, incidental, special, exemplary, or consequential damages
RyoheiHagimoto 0:0e0631af0305 37 // (including, but not limited to, procurement of substitute goods or services;
RyoheiHagimoto 0:0e0631af0305 38 // loss of use, data, or profits; or business interruption) however caused
RyoheiHagimoto 0:0e0631af0305 39 // and on any theory of liability, whether in contract, strict liability,
RyoheiHagimoto 0:0e0631af0305 40 // or tort (including negligence or otherwise) arising in any way out of
RyoheiHagimoto 0:0e0631af0305 41 // the use of this software, even if advised of the possibility of such damage.
RyoheiHagimoto 0:0e0631af0305 42 //
RyoheiHagimoto 0:0e0631af0305 43 //M*/
RyoheiHagimoto 0:0e0631af0305 44
RyoheiHagimoto 0:0e0631af0305 45 #ifndef OPENCV_HAL_INTRIN_CPP_HPP
RyoheiHagimoto 0:0e0631af0305 46 #define OPENCV_HAL_INTRIN_CPP_HPP
RyoheiHagimoto 0:0e0631af0305 47
RyoheiHagimoto 0:0e0631af0305 48 #include <limits>
RyoheiHagimoto 0:0e0631af0305 49 #include <cstring>
RyoheiHagimoto 0:0e0631af0305 50 #include <algorithm>
RyoheiHagimoto 0:0e0631af0305 51 #include "opencv2/core/saturate.hpp"
RyoheiHagimoto 0:0e0631af0305 52
RyoheiHagimoto 0:0e0631af0305 53 namespace cv
RyoheiHagimoto 0:0e0631af0305 54 {
RyoheiHagimoto 0:0e0631af0305 55
RyoheiHagimoto 0:0e0631af0305 56 /** @addtogroup core_hal_intrin
RyoheiHagimoto 0:0e0631af0305 57
RyoheiHagimoto 0:0e0631af0305 58 "Universal intrinsics" is a types and functions set intended to simplify vectorization of code on
RyoheiHagimoto 0:0e0631af0305 59 different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86
RyoheiHagimoto 0:0e0631af0305 60 architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers
RyoheiHagimoto 0:0e0631af0305 61 containing packed values of different types. In case when there is no SIMD extension available
RyoheiHagimoto 0:0e0631af0305 62 during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as
RyoheiHagimoto 0:0e0631af0305 63 expected although it could be slower.
RyoheiHagimoto 0:0e0631af0305 64
RyoheiHagimoto 0:0e0631af0305 65 ### Types
RyoheiHagimoto 0:0e0631af0305 66
RyoheiHagimoto 0:0e0631af0305 67 There are several types representing 128-bit register as a vector of packed values, each type is
RyoheiHagimoto 0:0e0631af0305 68 implemented as a structure based on a one SIMD register.
RyoheiHagimoto 0:0e0631af0305 69
RyoheiHagimoto 0:0e0631af0305 70 - cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
RyoheiHagimoto 0:0e0631af0305 71 - cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
RyoheiHagimoto 0:0e0631af0305 72 - cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int
RyoheiHagimoto 0:0e0631af0305 73 - cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
RyoheiHagimoto 0:0e0631af0305 74 - cv::v_float32x4: four 32-bit floating point values (signed) - float
RyoheiHagimoto 0:0e0631af0305 75 - cv::v_float64x2: two 64-bit floating point valies (signed) - double
RyoheiHagimoto 0:0e0631af0305 76
RyoheiHagimoto 0:0e0631af0305 77 @note
RyoheiHagimoto 0:0e0631af0305 78 cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to
RyoheiHagimoto 0:0e0631af0305 79 check the CV_SIMD128_64F preprocessor definition:
RyoheiHagimoto 0:0e0631af0305 80 @code
RyoheiHagimoto 0:0e0631af0305 81 #if CV_SIMD128_64F
RyoheiHagimoto 0:0e0631af0305 82 //...
RyoheiHagimoto 0:0e0631af0305 83 #endif
RyoheiHagimoto 0:0e0631af0305 84 @endcode
RyoheiHagimoto 0:0e0631af0305 85
RyoheiHagimoto 0:0e0631af0305 86 ### Load and store operations
RyoheiHagimoto 0:0e0631af0305 87
RyoheiHagimoto 0:0e0631af0305 88 These operations allow to set contents of the register explicitly or by loading it from some memory
RyoheiHagimoto 0:0e0631af0305 89 block and to save contents of the register to memory block.
RyoheiHagimoto 0:0e0631af0305 90
RyoheiHagimoto 0:0e0631af0305 91 - Constructors:
RyoheiHagimoto 0:0e0631af0305 92 @ref v_reg::v_reg(const _Tp *ptr) "from memory",
RyoheiHagimoto 0:0e0631af0305 93 @ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ...
RyoheiHagimoto 0:0e0631af0305 94 - Other create methods:
RyoheiHagimoto 0:0e0631af0305 95 @ref v_setall_s8, @ref v_setall_u8, ...,
RyoheiHagimoto 0:0e0631af0305 96 @ref v_setzero_u8, @ref v_setzero_s8, ...
RyoheiHagimoto 0:0e0631af0305 97 - Memory operations:
RyoheiHagimoto 0:0e0631af0305 98 @ref v_load, @ref v_load_aligned, @ref v_load_halves,
RyoheiHagimoto 0:0e0631af0305 99 @ref v_store, @ref v_store_aligned,
RyoheiHagimoto 0:0e0631af0305 100 @ref v_store_high, @ref v_store_low
RyoheiHagimoto 0:0e0631af0305 101
RyoheiHagimoto 0:0e0631af0305 102 ### Value reordering
RyoheiHagimoto 0:0e0631af0305 103
RyoheiHagimoto 0:0e0631af0305 104 These operations allow to reorder or recombine elements in one or multiple vectors.
RyoheiHagimoto 0:0e0631af0305 105
RyoheiHagimoto 0:0e0631af0305 106 - Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave
RyoheiHagimoto 0:0e0631af0305 107 - Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand
RyoheiHagimoto 0:0e0631af0305 108 - Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u,
RyoheiHagimoto 0:0e0631af0305 109 @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store
RyoheiHagimoto 0:0e0631af0305 110 - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high
RyoheiHagimoto 0:0e0631af0305 111 - Extract: @ref v_extract
RyoheiHagimoto 0:0e0631af0305 112
RyoheiHagimoto 0:0e0631af0305 113
RyoheiHagimoto 0:0e0631af0305 114 ### Arithmetic, bitwise and comparison operations
RyoheiHagimoto 0:0e0631af0305 115
RyoheiHagimoto 0:0e0631af0305 116 Element-wise binary and unary operations.
RyoheiHagimoto 0:0e0631af0305 117
RyoheiHagimoto 0:0e0631af0305 118 - Arithmetics:
RyoheiHagimoto 0:0e0631af0305 119 @ref operator +(const v_reg &a, const v_reg &b) "+",
RyoheiHagimoto 0:0e0631af0305 120 @ref operator -(const v_reg &a, const v_reg &b) "-",
RyoheiHagimoto 0:0e0631af0305 121 @ref operator *(const v_reg &a, const v_reg &b) "*",
RyoheiHagimoto 0:0e0631af0305 122 @ref operator /(const v_reg &a, const v_reg &b) "/",
RyoheiHagimoto 0:0e0631af0305 123 @ref v_mul_expand
RyoheiHagimoto 0:0e0631af0305 124
RyoheiHagimoto 0:0e0631af0305 125 - Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap
RyoheiHagimoto 0:0e0631af0305 126
RyoheiHagimoto 0:0e0631af0305 127 - Bitwise shifts:
RyoheiHagimoto 0:0e0631af0305 128 @ref operator <<(const v_reg &a, int s) "<<",
RyoheiHagimoto 0:0e0631af0305 129 @ref operator >>(const v_reg &a, int s) ">>",
RyoheiHagimoto 0:0e0631af0305 130 @ref v_shl, @ref v_shr
RyoheiHagimoto 0:0e0631af0305 131
RyoheiHagimoto 0:0e0631af0305 132 - Bitwise logic:
RyoheiHagimoto 0:0e0631af0305 133 @ref operator&(const v_reg &a, const v_reg &b) "&",
RyoheiHagimoto 0:0e0631af0305 134 @ref operator |(const v_reg &a, const v_reg &b) "|",
RyoheiHagimoto 0:0e0631af0305 135 @ref operator ^(const v_reg &a, const v_reg &b) "^",
RyoheiHagimoto 0:0e0631af0305 136 @ref operator ~(const v_reg &a) "~"
RyoheiHagimoto 0:0e0631af0305 137
RyoheiHagimoto 0:0e0631af0305 138 - Comparison:
RyoheiHagimoto 0:0e0631af0305 139 @ref operator >(const v_reg &a, const v_reg &b) ">",
RyoheiHagimoto 0:0e0631af0305 140 @ref operator >=(const v_reg &a, const v_reg &b) ">=",
RyoheiHagimoto 0:0e0631af0305 141 @ref operator <(const v_reg &a, const v_reg &b) "<",
RyoheiHagimoto 0:0e0631af0305 142 @ref operator <=(const v_reg &a, const v_reg &b) "<=",
RyoheiHagimoto 0:0e0631af0305 143 @ref operator==(const v_reg &a, const v_reg &b) "==",
RyoheiHagimoto 0:0e0631af0305 144 @ref operator !=(const v_reg &a, const v_reg &b) "!="
RyoheiHagimoto 0:0e0631af0305 145
RyoheiHagimoto 0:0e0631af0305 146 - min/max: @ref v_min, @ref v_max
RyoheiHagimoto 0:0e0631af0305 147
RyoheiHagimoto 0:0e0631af0305 148 ### Reduce and mask
RyoheiHagimoto 0:0e0631af0305 149
RyoheiHagimoto 0:0e0631af0305 150 Most of these operations return only one value.
RyoheiHagimoto 0:0e0631af0305 151
RyoheiHagimoto 0:0e0631af0305 152 - Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum
RyoheiHagimoto 0:0e0631af0305 153 - Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select
RyoheiHagimoto 0:0e0631af0305 154
RyoheiHagimoto 0:0e0631af0305 155 ### Other math
RyoheiHagimoto 0:0e0631af0305 156
RyoheiHagimoto 0:0e0631af0305 157 - Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude
RyoheiHagimoto 0:0e0631af0305 158 - Absolute values: @ref v_abs, @ref v_absdiff
RyoheiHagimoto 0:0e0631af0305 159
RyoheiHagimoto 0:0e0631af0305 160 ### Conversions
RyoheiHagimoto 0:0e0631af0305 161
RyoheiHagimoto 0:0e0631af0305 162 Different type conversions and casts:
RyoheiHagimoto 0:0e0631af0305 163
RyoheiHagimoto 0:0e0631af0305 164 - Rounding: @ref v_round, @ref v_floor, @ref v_ceil, @ref v_trunc,
RyoheiHagimoto 0:0e0631af0305 165 - To float: @ref v_cvt_f32, @ref v_cvt_f64
RyoheiHagimoto 0:0e0631af0305 166 - Reinterpret: @ref v_reinterpret_as_u8, @ref v_reinterpret_as_s8, ...
RyoheiHagimoto 0:0e0631af0305 167
RyoheiHagimoto 0:0e0631af0305 168 ### Matrix operations
RyoheiHagimoto 0:0e0631af0305 169
RyoheiHagimoto 0:0e0631af0305 170 In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_matmul, @ref v_transpose4x4
RyoheiHagimoto 0:0e0631af0305 171
RyoheiHagimoto 0:0e0631af0305 172 ### Usability
RyoheiHagimoto 0:0e0631af0305 173
RyoheiHagimoto 0:0e0631af0305 174 Most operations are implemented only for some subset of the available types, following matrices
RyoheiHagimoto 0:0e0631af0305 175 shows the applicability of different operations to the types.
RyoheiHagimoto 0:0e0631af0305 176
RyoheiHagimoto 0:0e0631af0305 177 Regular integers:
RyoheiHagimoto 0:0e0631af0305 178
RyoheiHagimoto 0:0e0631af0305 179 | Operations\\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 |
RyoheiHagimoto 0:0e0631af0305 180 |-------------------|:-:|:-:|:-:|:-:|:-:|:-:|
RyoheiHagimoto 0:0e0631af0305 181 |load, store | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 182 |interleave | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 183 |expand | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 184 |expand_q | x | x | | | | |
RyoheiHagimoto 0:0e0631af0305 185 |add, sub | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 186 |add_wrap, sub_wrap | x | x | x | x | | |
RyoheiHagimoto 0:0e0631af0305 187 |mul | | | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 188 |mul_expand | | | x | x | x | |
RyoheiHagimoto 0:0e0631af0305 189 |compare | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 190 |shift | | | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 191 |dotprod | | | | x | | |
RyoheiHagimoto 0:0e0631af0305 192 |logical | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 193 |min, max | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 194 |absdiff | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 195 |reduce | | | | | x | x |
RyoheiHagimoto 0:0e0631af0305 196 |mask | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 197 |pack | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 198 |pack_u | x | | x | | | |
RyoheiHagimoto 0:0e0631af0305 199 |unpack | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 200 |extract | x | x | x | x | x | x |
RyoheiHagimoto 0:0e0631af0305 201 |cvt_flt32 | | | | | | x |
RyoheiHagimoto 0:0e0631af0305 202 |cvt_flt64 | | | | | | x |
RyoheiHagimoto 0:0e0631af0305 203 |transpose4x4 | | | | | x | x |
RyoheiHagimoto 0:0e0631af0305 204
RyoheiHagimoto 0:0e0631af0305 205 Big integers:
RyoheiHagimoto 0:0e0631af0305 206
RyoheiHagimoto 0:0e0631af0305 207 | Operations\\Types | uint 64x2 | int 64x2 |
RyoheiHagimoto 0:0e0631af0305 208 |-------------------|:-:|:-:|
RyoheiHagimoto 0:0e0631af0305 209 |load, store | x | x |
RyoheiHagimoto 0:0e0631af0305 210 |add, sub | x | x |
RyoheiHagimoto 0:0e0631af0305 211 |shift | x | x |
RyoheiHagimoto 0:0e0631af0305 212 |logical | x | x |
RyoheiHagimoto 0:0e0631af0305 213 |extract | x | x |
RyoheiHagimoto 0:0e0631af0305 214
RyoheiHagimoto 0:0e0631af0305 215 Floating point:
RyoheiHagimoto 0:0e0631af0305 216
RyoheiHagimoto 0:0e0631af0305 217 | Operations\\Types | float 32x4 | float 64x2 |
RyoheiHagimoto 0:0e0631af0305 218 |-------------------|:-:|:-:|
RyoheiHagimoto 0:0e0631af0305 219 |load, store | x | x |
RyoheiHagimoto 0:0e0631af0305 220 |interleave | x | |
RyoheiHagimoto 0:0e0631af0305 221 |add, sub | x | x |
RyoheiHagimoto 0:0e0631af0305 222 |mul | x | x |
RyoheiHagimoto 0:0e0631af0305 223 |div | x | x |
RyoheiHagimoto 0:0e0631af0305 224 |compare | x | x |
RyoheiHagimoto 0:0e0631af0305 225 |min, max | x | x |
RyoheiHagimoto 0:0e0631af0305 226 |absdiff | x | x |
RyoheiHagimoto 0:0e0631af0305 227 |reduce | x | |
RyoheiHagimoto 0:0e0631af0305 228 |mask | x | x |
RyoheiHagimoto 0:0e0631af0305 229 |unpack | x | x |
RyoheiHagimoto 0:0e0631af0305 230 |cvt_flt32 | | x |
RyoheiHagimoto 0:0e0631af0305 231 |cvt_flt64 | x | |
RyoheiHagimoto 0:0e0631af0305 232 |sqrt, abs | x | x |
RyoheiHagimoto 0:0e0631af0305 233 |float math | x | x |
RyoheiHagimoto 0:0e0631af0305 234 |transpose4x4 | x | |
RyoheiHagimoto 0:0e0631af0305 235
RyoheiHagimoto 0:0e0631af0305 236
RyoheiHagimoto 0:0e0631af0305 237 @{ */
RyoheiHagimoto 0:0e0631af0305 238
RyoheiHagimoto 0:0e0631af0305 239 template<typename _Tp, int n> struct v_reg
RyoheiHagimoto 0:0e0631af0305 240 {
RyoheiHagimoto 0:0e0631af0305 241 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 242 typedef _Tp lane_type;
RyoheiHagimoto 0:0e0631af0305 243 typedef v_reg<typename V_TypeTraits<_Tp>::int_type, n> int_vec;
RyoheiHagimoto 0:0e0631af0305 244 typedef v_reg<typename V_TypeTraits<_Tp>::abs_type, n> abs_vec;
RyoheiHagimoto 0:0e0631af0305 245 enum { nlanes = n };
RyoheiHagimoto 0:0e0631af0305 246 // !@endcond
RyoheiHagimoto 0:0e0631af0305 247
RyoheiHagimoto 0:0e0631af0305 248 /** @brief Constructor
RyoheiHagimoto 0:0e0631af0305 249
RyoheiHagimoto 0:0e0631af0305 250 Initializes register with data from memory
RyoheiHagimoto 0:0e0631af0305 251 @param ptr pointer to memory block with data for register */
RyoheiHagimoto 0:0e0631af0305 252 explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; }
RyoheiHagimoto 0:0e0631af0305 253
RyoheiHagimoto 0:0e0631af0305 254 /** @brief Constructor
RyoheiHagimoto 0:0e0631af0305 255
RyoheiHagimoto 0:0e0631af0305 256 Initializes register with two 64-bit values */
RyoheiHagimoto 0:0e0631af0305 257 v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; }
RyoheiHagimoto 0:0e0631af0305 258
RyoheiHagimoto 0:0e0631af0305 259 /** @brief Constructor
RyoheiHagimoto 0:0e0631af0305 260
RyoheiHagimoto 0:0e0631af0305 261 Initializes register with four 32-bit values */
RyoheiHagimoto 0:0e0631af0305 262 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; }
RyoheiHagimoto 0:0e0631af0305 263
RyoheiHagimoto 0:0e0631af0305 264 /** @brief Constructor
RyoheiHagimoto 0:0e0631af0305 265
RyoheiHagimoto 0:0e0631af0305 266 Initializes register with eight 16-bit values */
RyoheiHagimoto 0:0e0631af0305 267 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
RyoheiHagimoto 0:0e0631af0305 268 _Tp s4, _Tp s5, _Tp s6, _Tp s7)
RyoheiHagimoto 0:0e0631af0305 269 {
RyoheiHagimoto 0:0e0631af0305 270 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
RyoheiHagimoto 0:0e0631af0305 271 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
RyoheiHagimoto 0:0e0631af0305 272 }
RyoheiHagimoto 0:0e0631af0305 273
RyoheiHagimoto 0:0e0631af0305 274 /** @brief Constructor
RyoheiHagimoto 0:0e0631af0305 275
RyoheiHagimoto 0:0e0631af0305 276 Initializes register with sixteen 8-bit values */
RyoheiHagimoto 0:0e0631af0305 277 v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3,
RyoheiHagimoto 0:0e0631af0305 278 _Tp s4, _Tp s5, _Tp s6, _Tp s7,
RyoheiHagimoto 0:0e0631af0305 279 _Tp s8, _Tp s9, _Tp s10, _Tp s11,
RyoheiHagimoto 0:0e0631af0305 280 _Tp s12, _Tp s13, _Tp s14, _Tp s15)
RyoheiHagimoto 0:0e0631af0305 281 {
RyoheiHagimoto 0:0e0631af0305 282 s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3;
RyoheiHagimoto 0:0e0631af0305 283 s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7;
RyoheiHagimoto 0:0e0631af0305 284 s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11;
RyoheiHagimoto 0:0e0631af0305 285 s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15;
RyoheiHagimoto 0:0e0631af0305 286 }
RyoheiHagimoto 0:0e0631af0305 287
RyoheiHagimoto 0:0e0631af0305 288 /** @brief Default constructor
RyoheiHagimoto 0:0e0631af0305 289
RyoheiHagimoto 0:0e0631af0305 290 Does not initialize anything*/
RyoheiHagimoto 0:0e0631af0305 291 v_reg() {}
RyoheiHagimoto 0:0e0631af0305 292
RyoheiHagimoto 0:0e0631af0305 293 /** @brief Copy constructor */
RyoheiHagimoto 0:0e0631af0305 294 v_reg(const v_reg<_Tp, n> & r)
RyoheiHagimoto 0:0e0631af0305 295 {
RyoheiHagimoto 0:0e0631af0305 296 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 297 s[i] = r.s[i];
RyoheiHagimoto 0:0e0631af0305 298 }
RyoheiHagimoto 0:0e0631af0305 299 /** @brief Access first value
RyoheiHagimoto 0:0e0631af0305 300
RyoheiHagimoto 0:0e0631af0305 301 Returns value of the first lane according to register type, for example:
RyoheiHagimoto 0:0e0631af0305 302 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 303 v_int32x4 r(1, 2, 3, 4);
RyoheiHagimoto 0:0e0631af0305 304 int v = r.get0(); // returns 1
RyoheiHagimoto 0:0e0631af0305 305 v_uint64x2 r(1, 2);
RyoheiHagimoto 0:0e0631af0305 306 uint64_t v = r.get0(); // returns 1
RyoheiHagimoto 0:0e0631af0305 307 @endcode
RyoheiHagimoto 0:0e0631af0305 308 */
RyoheiHagimoto 0:0e0631af0305 309 _Tp get0() const { return s[0]; }
RyoheiHagimoto 0:0e0631af0305 310
RyoheiHagimoto 0:0e0631af0305 311 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 312 _Tp get(const int i) const { return s[i]; }
RyoheiHagimoto 0:0e0631af0305 313 v_reg<_Tp, n> high() const
RyoheiHagimoto 0:0e0631af0305 314 {
RyoheiHagimoto 0:0e0631af0305 315 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 316 int i;
RyoheiHagimoto 0:0e0631af0305 317 for( i = 0; i < n/2; i++ )
RyoheiHagimoto 0:0e0631af0305 318 {
RyoheiHagimoto 0:0e0631af0305 319 c.s[i] = s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 320 c.s[i+(n/2)] = 0;
RyoheiHagimoto 0:0e0631af0305 321 }
RyoheiHagimoto 0:0e0631af0305 322 return c;
RyoheiHagimoto 0:0e0631af0305 323 }
RyoheiHagimoto 0:0e0631af0305 324
RyoheiHagimoto 0:0e0631af0305 325 static v_reg<_Tp, n> zero()
RyoheiHagimoto 0:0e0631af0305 326 {
RyoheiHagimoto 0:0e0631af0305 327 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 328 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 329 c.s[i] = (_Tp)0;
RyoheiHagimoto 0:0e0631af0305 330 return c;
RyoheiHagimoto 0:0e0631af0305 331 }
RyoheiHagimoto 0:0e0631af0305 332
RyoheiHagimoto 0:0e0631af0305 333 static v_reg<_Tp, n> all(_Tp s)
RyoheiHagimoto 0:0e0631af0305 334 {
RyoheiHagimoto 0:0e0631af0305 335 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 336 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 337 c.s[i] = s;
RyoheiHagimoto 0:0e0631af0305 338 return c;
RyoheiHagimoto 0:0e0631af0305 339 }
RyoheiHagimoto 0:0e0631af0305 340
RyoheiHagimoto 0:0e0631af0305 341 template<typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const
RyoheiHagimoto 0:0e0631af0305 342 {
RyoheiHagimoto 0:0e0631af0305 343 size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n);
RyoheiHagimoto 0:0e0631af0305 344 v_reg<_Tp2, n2> c;
RyoheiHagimoto 0:0e0631af0305 345 std::memcpy(&c.s[0], &s[0], bytes);
RyoheiHagimoto 0:0e0631af0305 346 return c;
RyoheiHagimoto 0:0e0631af0305 347 }
RyoheiHagimoto 0:0e0631af0305 348
RyoheiHagimoto 0:0e0631af0305 349 _Tp s[n];
RyoheiHagimoto 0:0e0631af0305 350 //! @endcond
RyoheiHagimoto 0:0e0631af0305 351 };
RyoheiHagimoto 0:0e0631af0305 352
RyoheiHagimoto 0:0e0631af0305 353 /** @brief Sixteen 8-bit unsigned integer values */
RyoheiHagimoto 0:0e0631af0305 354 typedef v_reg<uchar, 16> v_uint8x16;
RyoheiHagimoto 0:0e0631af0305 355 /** @brief Sixteen 8-bit signed integer values */
RyoheiHagimoto 0:0e0631af0305 356 typedef v_reg<schar, 16> v_int8x16;
RyoheiHagimoto 0:0e0631af0305 357 /** @brief Eight 16-bit unsigned integer values */
RyoheiHagimoto 0:0e0631af0305 358 typedef v_reg<ushort, 8> v_uint16x8;
RyoheiHagimoto 0:0e0631af0305 359 /** @brief Eight 16-bit signed integer values */
RyoheiHagimoto 0:0e0631af0305 360 typedef v_reg<short, 8> v_int16x8;
RyoheiHagimoto 0:0e0631af0305 361 /** @brief Four 32-bit unsigned integer values */
RyoheiHagimoto 0:0e0631af0305 362 typedef v_reg<unsigned, 4> v_uint32x4;
RyoheiHagimoto 0:0e0631af0305 363 /** @brief Four 32-bit signed integer values */
RyoheiHagimoto 0:0e0631af0305 364 typedef v_reg<int, 4> v_int32x4;
RyoheiHagimoto 0:0e0631af0305 365 /** @brief Four 32-bit floating point values (single precision) */
RyoheiHagimoto 0:0e0631af0305 366 typedef v_reg<float, 4> v_float32x4;
RyoheiHagimoto 0:0e0631af0305 367 /** @brief Two 64-bit floating point values (double precision) */
RyoheiHagimoto 0:0e0631af0305 368 typedef v_reg<double, 2> v_float64x2;
RyoheiHagimoto 0:0e0631af0305 369 /** @brief Two 64-bit unsigned integer values */
RyoheiHagimoto 0:0e0631af0305 370 typedef v_reg<uint64, 2> v_uint64x2;
RyoheiHagimoto 0:0e0631af0305 371 /** @brief Two 64-bit signed integer values */
RyoheiHagimoto 0:0e0631af0305 372 typedef v_reg<int64, 2> v_int64x2;
RyoheiHagimoto 0:0e0631af0305 373
RyoheiHagimoto 0:0e0631af0305 374 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 375 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 376 #define OPENCV_HAL_IMPL_BIN_OP(bin_op) \
RyoheiHagimoto 0:0e0631af0305 377 template<typename _Tp, int n> inline v_reg<_Tp, n> \
RyoheiHagimoto 0:0e0631af0305 378 operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
RyoheiHagimoto 0:0e0631af0305 379 { \
RyoheiHagimoto 0:0e0631af0305 380 v_reg<_Tp, n> c; \
RyoheiHagimoto 0:0e0631af0305 381 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 382 c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
RyoheiHagimoto 0:0e0631af0305 383 return c; \
RyoheiHagimoto 0:0e0631af0305 384 } \
RyoheiHagimoto 0:0e0631af0305 385 template<typename _Tp, int n> inline v_reg<_Tp, n>& \
RyoheiHagimoto 0:0e0631af0305 386 operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
RyoheiHagimoto 0:0e0631af0305 387 { \
RyoheiHagimoto 0:0e0631af0305 388 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 389 a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \
RyoheiHagimoto 0:0e0631af0305 390 return a; \
RyoheiHagimoto 0:0e0631af0305 391 }
RyoheiHagimoto 0:0e0631af0305 392
RyoheiHagimoto 0:0e0631af0305 393 /** @brief Add values
RyoheiHagimoto 0:0e0631af0305 394
RyoheiHagimoto 0:0e0631af0305 395 For all types. */
RyoheiHagimoto 0:0e0631af0305 396 OPENCV_HAL_IMPL_BIN_OP(+)
RyoheiHagimoto 0:0e0631af0305 397
RyoheiHagimoto 0:0e0631af0305 398 /** @brief Subtract values
RyoheiHagimoto 0:0e0631af0305 399
RyoheiHagimoto 0:0e0631af0305 400 For all types. */
RyoheiHagimoto 0:0e0631af0305 401 OPENCV_HAL_IMPL_BIN_OP(-)
RyoheiHagimoto 0:0e0631af0305 402
RyoheiHagimoto 0:0e0631af0305 403 /** @brief Multiply values
RyoheiHagimoto 0:0e0631af0305 404
RyoheiHagimoto 0:0e0631af0305 405 For 16- and 32-bit integer types and floating types. */
RyoheiHagimoto 0:0e0631af0305 406 OPENCV_HAL_IMPL_BIN_OP(*)
RyoheiHagimoto 0:0e0631af0305 407
RyoheiHagimoto 0:0e0631af0305 408 /** @brief Divide values
RyoheiHagimoto 0:0e0631af0305 409
RyoheiHagimoto 0:0e0631af0305 410 For floating types only. */
RyoheiHagimoto 0:0e0631af0305 411 OPENCV_HAL_IMPL_BIN_OP(/)
RyoheiHagimoto 0:0e0631af0305 412
RyoheiHagimoto 0:0e0631af0305 413 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 414 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 415 #define OPENCV_HAL_IMPL_BIT_OP(bit_op) \
RyoheiHagimoto 0:0e0631af0305 416 template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \
RyoheiHagimoto 0:0e0631af0305 417 (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
RyoheiHagimoto 0:0e0631af0305 418 { \
RyoheiHagimoto 0:0e0631af0305 419 v_reg<_Tp, n> c; \
RyoheiHagimoto 0:0e0631af0305 420 typedef typename V_TypeTraits<_Tp>::int_type itype; \
RyoheiHagimoto 0:0e0631af0305 421 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 422 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
RyoheiHagimoto 0:0e0631af0305 423 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
RyoheiHagimoto 0:0e0631af0305 424 return c; \
RyoheiHagimoto 0:0e0631af0305 425 } \
RyoheiHagimoto 0:0e0631af0305 426 template<typename _Tp, int n> inline v_reg<_Tp, n>& operator \
RyoheiHagimoto 0:0e0631af0305 427 bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
RyoheiHagimoto 0:0e0631af0305 428 { \
RyoheiHagimoto 0:0e0631af0305 429 typedef typename V_TypeTraits<_Tp>::int_type itype; \
RyoheiHagimoto 0:0e0631af0305 430 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 431 a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \
RyoheiHagimoto 0:0e0631af0305 432 V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \
RyoheiHagimoto 0:0e0631af0305 433 return a; \
RyoheiHagimoto 0:0e0631af0305 434 }
RyoheiHagimoto 0:0e0631af0305 435
RyoheiHagimoto 0:0e0631af0305 436 /** @brief Bitwise AND
RyoheiHagimoto 0:0e0631af0305 437
RyoheiHagimoto 0:0e0631af0305 438 Only for integer types. */
RyoheiHagimoto 0:0e0631af0305 439 OPENCV_HAL_IMPL_BIT_OP(&)
RyoheiHagimoto 0:0e0631af0305 440
RyoheiHagimoto 0:0e0631af0305 441 /** @brief Bitwise OR
RyoheiHagimoto 0:0e0631af0305 442
RyoheiHagimoto 0:0e0631af0305 443 Only for integer types. */
RyoheiHagimoto 0:0e0631af0305 444 OPENCV_HAL_IMPL_BIT_OP(|)
RyoheiHagimoto 0:0e0631af0305 445
RyoheiHagimoto 0:0e0631af0305 446 /** @brief Bitwise XOR
RyoheiHagimoto 0:0e0631af0305 447
RyoheiHagimoto 0:0e0631af0305 448 Only for integer types.*/
RyoheiHagimoto 0:0e0631af0305 449 OPENCV_HAL_IMPL_BIT_OP(^)
RyoheiHagimoto 0:0e0631af0305 450
RyoheiHagimoto 0:0e0631af0305 451 /** @brief Bitwise NOT
RyoheiHagimoto 0:0e0631af0305 452
RyoheiHagimoto 0:0e0631af0305 453 Only for integer types.*/
RyoheiHagimoto 0:0e0631af0305 454 template<typename _Tp, int n> inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 455 {
RyoheiHagimoto 0:0e0631af0305 456 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 457 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 458 {
RyoheiHagimoto 0:0e0631af0305 459 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i]));
RyoheiHagimoto 0:0e0631af0305 460 }
RyoheiHagimoto 0:0e0631af0305 461 return c;
RyoheiHagimoto 0:0e0631af0305 462 }
RyoheiHagimoto 0:0e0631af0305 463
RyoheiHagimoto 0:0e0631af0305 464 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 465 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 466 #define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \
RyoheiHagimoto 0:0e0631af0305 467 template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \
RyoheiHagimoto 0:0e0631af0305 468 { \
RyoheiHagimoto 0:0e0631af0305 469 v_reg<_Tp2, n> c; \
RyoheiHagimoto 0:0e0631af0305 470 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 471 c.s[i] = cfunc(a.s[i]); \
RyoheiHagimoto 0:0e0631af0305 472 return c; \
RyoheiHagimoto 0:0e0631af0305 473 }
RyoheiHagimoto 0:0e0631af0305 474
RyoheiHagimoto 0:0e0631af0305 475 /** @brief Square root of elements
RyoheiHagimoto 0:0e0631af0305 476
RyoheiHagimoto 0:0e0631af0305 477 Only for floating point types.*/
RyoheiHagimoto 0:0e0631af0305 478 OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp)
RyoheiHagimoto 0:0e0631af0305 479
RyoheiHagimoto 0:0e0631af0305 480 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 481 OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
RyoheiHagimoto 0:0e0631af0305 482 OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
RyoheiHagimoto 0:0e0631af0305 483 OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp)
RyoheiHagimoto 0:0e0631af0305 484 OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
RyoheiHagimoto 0:0e0631af0305 485 //! @endcond
RyoheiHagimoto 0:0e0631af0305 486
RyoheiHagimoto 0:0e0631af0305 487 /** @brief Absolute value of elements
RyoheiHagimoto 0:0e0631af0305 488
RyoheiHagimoto 0:0e0631af0305 489 Only for floating point types.*/
RyoheiHagimoto 0:0e0631af0305 490 OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs,
RyoheiHagimoto 0:0e0631af0305 491 typename V_TypeTraits<_Tp>::abs_type)
RyoheiHagimoto 0:0e0631af0305 492
RyoheiHagimoto 0:0e0631af0305 493 /** @brief Round elements
RyoheiHagimoto 0:0e0631af0305 494
RyoheiHagimoto 0:0e0631af0305 495 Only for floating point types.*/
RyoheiHagimoto 0:0e0631af0305 496 OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int)
RyoheiHagimoto 0:0e0631af0305 497
RyoheiHagimoto 0:0e0631af0305 498 /** @brief Floor elements
RyoheiHagimoto 0:0e0631af0305 499
RyoheiHagimoto 0:0e0631af0305 500 Only for floating point types.*/
RyoheiHagimoto 0:0e0631af0305 501 OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int)
RyoheiHagimoto 0:0e0631af0305 502
RyoheiHagimoto 0:0e0631af0305 503 /** @brief Ceil elements
RyoheiHagimoto 0:0e0631af0305 504
RyoheiHagimoto 0:0e0631af0305 505 Only for floating point types.*/
RyoheiHagimoto 0:0e0631af0305 506 OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int)
RyoheiHagimoto 0:0e0631af0305 507
RyoheiHagimoto 0:0e0631af0305 508 /** @brief Truncate elements
RyoheiHagimoto 0:0e0631af0305 509
RyoheiHagimoto 0:0e0631af0305 510 Only for floating point types.*/
RyoheiHagimoto 0:0e0631af0305 511 OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int)
RyoheiHagimoto 0:0e0631af0305 512
RyoheiHagimoto 0:0e0631af0305 513 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 514 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 515 #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \
RyoheiHagimoto 0:0e0631af0305 516 template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
RyoheiHagimoto 0:0e0631af0305 517 { \
RyoheiHagimoto 0:0e0631af0305 518 v_reg<_Tp, n> c; \
RyoheiHagimoto 0:0e0631af0305 519 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 520 c.s[i] = cfunc(a.s[i], b.s[i]); \
RyoheiHagimoto 0:0e0631af0305 521 return c; \
RyoheiHagimoto 0:0e0631af0305 522 }
RyoheiHagimoto 0:0e0631af0305 523
RyoheiHagimoto 0:0e0631af0305 524 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 525 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 526 #define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \
RyoheiHagimoto 0:0e0631af0305 527 template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \
RyoheiHagimoto 0:0e0631af0305 528 { \
RyoheiHagimoto 0:0e0631af0305 529 _Tp c = a.s[0]; \
RyoheiHagimoto 0:0e0631af0305 530 for( int i = 1; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 531 c = cfunc(c, a.s[i]); \
RyoheiHagimoto 0:0e0631af0305 532 return c; \
RyoheiHagimoto 0:0e0631af0305 533 }
RyoheiHagimoto 0:0e0631af0305 534
RyoheiHagimoto 0:0e0631af0305 535 /** @brief Choose min values for each pair
RyoheiHagimoto 0:0e0631af0305 536
RyoheiHagimoto 0:0e0631af0305 537 Scheme:
RyoheiHagimoto 0:0e0631af0305 538 @code
RyoheiHagimoto 0:0e0631af0305 539 {A1 A2 ...}
RyoheiHagimoto 0:0e0631af0305 540 {B1 B2 ...}
RyoheiHagimoto 0:0e0631af0305 541 --------------
RyoheiHagimoto 0:0e0631af0305 542 {min(A1,B1) min(A2,B2) ...}
RyoheiHagimoto 0:0e0631af0305 543 @endcode
RyoheiHagimoto 0:0e0631af0305 544 For all types except 64-bit integer. */
RyoheiHagimoto 0:0e0631af0305 545 OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min)
RyoheiHagimoto 0:0e0631af0305 546
RyoheiHagimoto 0:0e0631af0305 547 /** @brief Choose max values for each pair
RyoheiHagimoto 0:0e0631af0305 548
RyoheiHagimoto 0:0e0631af0305 549 Scheme:
RyoheiHagimoto 0:0e0631af0305 550 @code
RyoheiHagimoto 0:0e0631af0305 551 {A1 A2 ...}
RyoheiHagimoto 0:0e0631af0305 552 {B1 B2 ...}
RyoheiHagimoto 0:0e0631af0305 553 --------------
RyoheiHagimoto 0:0e0631af0305 554 {max(A1,B1) max(A2,B2) ...}
RyoheiHagimoto 0:0e0631af0305 555 @endcode
RyoheiHagimoto 0:0e0631af0305 556 For all types except 64-bit integer. */
RyoheiHagimoto 0:0e0631af0305 557 OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max)
RyoheiHagimoto 0:0e0631af0305 558
RyoheiHagimoto 0:0e0631af0305 559 /** @brief Find one min value
RyoheiHagimoto 0:0e0631af0305 560
RyoheiHagimoto 0:0e0631af0305 561 Scheme:
RyoheiHagimoto 0:0e0631af0305 562 @code
RyoheiHagimoto 0:0e0631af0305 563 {A1 A2 A3 ...} => min(A1,A2,A3,...)
RyoheiHagimoto 0:0e0631af0305 564 @endcode
RyoheiHagimoto 0:0e0631af0305 565 For 32-bit integer and 32-bit floating point types. */
RyoheiHagimoto 0:0e0631af0305 566 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min)
RyoheiHagimoto 0:0e0631af0305 567
RyoheiHagimoto 0:0e0631af0305 568 /** @brief Find one max value
RyoheiHagimoto 0:0e0631af0305 569
RyoheiHagimoto 0:0e0631af0305 570 Scheme:
RyoheiHagimoto 0:0e0631af0305 571 @code
RyoheiHagimoto 0:0e0631af0305 572 {A1 A2 A3 ...} => max(A1,A2,A3,...)
RyoheiHagimoto 0:0e0631af0305 573 @endcode
RyoheiHagimoto 0:0e0631af0305 574 For 32-bit integer and 32-bit floating point types. */
RyoheiHagimoto 0:0e0631af0305 575 OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max)
RyoheiHagimoto 0:0e0631af0305 576
RyoheiHagimoto 0:0e0631af0305 577 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 578 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 579 inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
RyoheiHagimoto 0:0e0631af0305 580 v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval )
RyoheiHagimoto 0:0e0631af0305 581 {
RyoheiHagimoto 0:0e0631af0305 582 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 583 {
RyoheiHagimoto 0:0e0631af0305 584 minval.s[i] = std::min(a.s[i], b.s[i]);
RyoheiHagimoto 0:0e0631af0305 585 maxval.s[i] = std::max(a.s[i], b.s[i]);
RyoheiHagimoto 0:0e0631af0305 586 }
RyoheiHagimoto 0:0e0631af0305 587 }
RyoheiHagimoto 0:0e0631af0305 588 //! @endcond
RyoheiHagimoto 0:0e0631af0305 589
RyoheiHagimoto 0:0e0631af0305 590 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 591 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 592 #define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \
RyoheiHagimoto 0:0e0631af0305 593 template<typename _Tp, int n> \
RyoheiHagimoto 0:0e0631af0305 594 inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
RyoheiHagimoto 0:0e0631af0305 595 { \
RyoheiHagimoto 0:0e0631af0305 596 typedef typename V_TypeTraits<_Tp>::int_type itype; \
RyoheiHagimoto 0:0e0631af0305 597 v_reg<_Tp, n> c; \
RyoheiHagimoto 0:0e0631af0305 598 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 599 c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \
RyoheiHagimoto 0:0e0631af0305 600 return c; \
RyoheiHagimoto 0:0e0631af0305 601 }
RyoheiHagimoto 0:0e0631af0305 602
RyoheiHagimoto 0:0e0631af0305 603 /** @brief Less-than comparison
RyoheiHagimoto 0:0e0631af0305 604
RyoheiHagimoto 0:0e0631af0305 605 For all types except 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 606 OPENCV_HAL_IMPL_CMP_OP(<)
RyoheiHagimoto 0:0e0631af0305 607
RyoheiHagimoto 0:0e0631af0305 608 /** @brief Greater-than comparison
RyoheiHagimoto 0:0e0631af0305 609
RyoheiHagimoto 0:0e0631af0305 610 For all types except 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 611 OPENCV_HAL_IMPL_CMP_OP(>)
RyoheiHagimoto 0:0e0631af0305 612
RyoheiHagimoto 0:0e0631af0305 613 /** @brief Less-than or equal comparison
RyoheiHagimoto 0:0e0631af0305 614
RyoheiHagimoto 0:0e0631af0305 615 For all types except 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 616 OPENCV_HAL_IMPL_CMP_OP(<=)
RyoheiHagimoto 0:0e0631af0305 617
RyoheiHagimoto 0:0e0631af0305 618 /** @brief Greater-than or equal comparison
RyoheiHagimoto 0:0e0631af0305 619
RyoheiHagimoto 0:0e0631af0305 620 For all types except 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 621 OPENCV_HAL_IMPL_CMP_OP(>=)
RyoheiHagimoto 0:0e0631af0305 622
RyoheiHagimoto 0:0e0631af0305 623 /** @brief Equal comparison
RyoheiHagimoto 0:0e0631af0305 624
RyoheiHagimoto 0:0e0631af0305 625 For all types except 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 626 OPENCV_HAL_IMPL_CMP_OP(==)
RyoheiHagimoto 0:0e0631af0305 627
RyoheiHagimoto 0:0e0631af0305 628 /** @brief Not equal comparison
RyoheiHagimoto 0:0e0631af0305 629
RyoheiHagimoto 0:0e0631af0305 630 For all types except 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 631 OPENCV_HAL_IMPL_CMP_OP(!=)
RyoheiHagimoto 0:0e0631af0305 632
RyoheiHagimoto 0:0e0631af0305 633 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 634 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 635 #define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \
RyoheiHagimoto 0:0e0631af0305 636 template<typename _Tp, int n> \
RyoheiHagimoto 0:0e0631af0305 637 inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \
RyoheiHagimoto 0:0e0631af0305 638 { \
RyoheiHagimoto 0:0e0631af0305 639 typedef _Tp2 rtype; \
RyoheiHagimoto 0:0e0631af0305 640 v_reg<rtype, n> c; \
RyoheiHagimoto 0:0e0631af0305 641 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 642 c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \
RyoheiHagimoto 0:0e0631af0305 643 return c; \
RyoheiHagimoto 0:0e0631af0305 644 }
RyoheiHagimoto 0:0e0631af0305 645
RyoheiHagimoto 0:0e0631af0305 646 /** @brief Add values without saturation
RyoheiHagimoto 0:0e0631af0305 647
RyoheiHagimoto 0:0e0631af0305 648 For 8- and 16-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 649 OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp)
RyoheiHagimoto 0:0e0631af0305 650
RyoheiHagimoto 0:0e0631af0305 651 /** @brief Subtract values without saturation
RyoheiHagimoto 0:0e0631af0305 652
RyoheiHagimoto 0:0e0631af0305 653 For 8- and 16-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 654 OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp)
RyoheiHagimoto 0:0e0631af0305 655
RyoheiHagimoto 0:0e0631af0305 656 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 657 template<typename T> inline T _absdiff(T a, T b)
RyoheiHagimoto 0:0e0631af0305 658 {
RyoheiHagimoto 0:0e0631af0305 659 return a > b ? a - b : b - a;
RyoheiHagimoto 0:0e0631af0305 660 }
RyoheiHagimoto 0:0e0631af0305 661 //! @endcond
RyoheiHagimoto 0:0e0631af0305 662
RyoheiHagimoto 0:0e0631af0305 663 /** @brief Absolute difference
RyoheiHagimoto 0:0e0631af0305 664
RyoheiHagimoto 0:0e0631af0305 665 Returns \f$ |a - b| \f$ converted to corresponding unsigned type.
RyoheiHagimoto 0:0e0631af0305 666 Example:
RyoheiHagimoto 0:0e0631af0305 667 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 668 v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}
RyoheiHagimoto 0:0e0631af0305 669 v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}
RyoheiHagimoto 0:0e0631af0305 670 @endcode
RyoheiHagimoto 0:0e0631af0305 671 For 8-, 16-, 32-bit integer source types. */
RyoheiHagimoto 0:0e0631af0305 672 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 673 inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b)
RyoheiHagimoto 0:0e0631af0305 674 {
RyoheiHagimoto 0:0e0631af0305 675 typedef typename V_TypeTraits<_Tp>::abs_type rtype;
RyoheiHagimoto 0:0e0631af0305 676 v_reg<rtype, n> c;
RyoheiHagimoto 0:0e0631af0305 677 const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0;
RyoheiHagimoto 0:0e0631af0305 678 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 679 {
RyoheiHagimoto 0:0e0631af0305 680 rtype ua = a.s[i] ^ mask;
RyoheiHagimoto 0:0e0631af0305 681 rtype ub = b.s[i] ^ mask;
RyoheiHagimoto 0:0e0631af0305 682 c.s[i] = _absdiff(ua, ub);
RyoheiHagimoto 0:0e0631af0305 683 }
RyoheiHagimoto 0:0e0631af0305 684 return c;
RyoheiHagimoto 0:0e0631af0305 685 }
RyoheiHagimoto 0:0e0631af0305 686
RyoheiHagimoto 0:0e0631af0305 687 /** @overload
RyoheiHagimoto 0:0e0631af0305 688
RyoheiHagimoto 0:0e0631af0305 689 For 32-bit floating point values */
RyoheiHagimoto 0:0e0631af0305 690 inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b)
RyoheiHagimoto 0:0e0631af0305 691 {
RyoheiHagimoto 0:0e0631af0305 692 v_float32x4 c;
RyoheiHagimoto 0:0e0631af0305 693 for( int i = 0; i < c.nlanes; i++ )
RyoheiHagimoto 0:0e0631af0305 694 c.s[i] = _absdiff(a.s[i], b.s[i]);
RyoheiHagimoto 0:0e0631af0305 695 return c;
RyoheiHagimoto 0:0e0631af0305 696 }
RyoheiHagimoto 0:0e0631af0305 697
RyoheiHagimoto 0:0e0631af0305 698 /** @overload
RyoheiHagimoto 0:0e0631af0305 699
RyoheiHagimoto 0:0e0631af0305 700 For 64-bit floating point values */
RyoheiHagimoto 0:0e0631af0305 701 inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b)
RyoheiHagimoto 0:0e0631af0305 702 {
RyoheiHagimoto 0:0e0631af0305 703 v_float64x2 c;
RyoheiHagimoto 0:0e0631af0305 704 for( int i = 0; i < c.nlanes; i++ )
RyoheiHagimoto 0:0e0631af0305 705 c.s[i] = _absdiff(a.s[i], b.s[i]);
RyoheiHagimoto 0:0e0631af0305 706 return c;
RyoheiHagimoto 0:0e0631af0305 707 }
RyoheiHagimoto 0:0e0631af0305 708
RyoheiHagimoto 0:0e0631af0305 709 /** @brief Inversed square root
RyoheiHagimoto 0:0e0631af0305 710
RyoheiHagimoto 0:0e0631af0305 711 Returns \f$ 1/sqrt(a) \f$
RyoheiHagimoto 0:0e0631af0305 712 For floating point types only. */
RyoheiHagimoto 0:0e0631af0305 713 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 714 inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 715 {
RyoheiHagimoto 0:0e0631af0305 716 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 717 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 718 c.s[i] = 1.f/std::sqrt(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 719 return c;
RyoheiHagimoto 0:0e0631af0305 720 }
RyoheiHagimoto 0:0e0631af0305 721
RyoheiHagimoto 0:0e0631af0305 722 /** @brief Magnitude
RyoheiHagimoto 0:0e0631af0305 723
RyoheiHagimoto 0:0e0631af0305 724 Returns \f$ sqrt(a^2 + b^2) \f$
RyoheiHagimoto 0:0e0631af0305 725 For floating point types only. */
RyoheiHagimoto 0:0e0631af0305 726 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 727 inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 728 {
RyoheiHagimoto 0:0e0631af0305 729 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 730 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 731 c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]);
RyoheiHagimoto 0:0e0631af0305 732 return c;
RyoheiHagimoto 0:0e0631af0305 733 }
RyoheiHagimoto 0:0e0631af0305 734
RyoheiHagimoto 0:0e0631af0305 735 /** @brief Square of the magnitude
RyoheiHagimoto 0:0e0631af0305 736
RyoheiHagimoto 0:0e0631af0305 737 Returns \f$ a^2 + b^2 \f$
RyoheiHagimoto 0:0e0631af0305 738 For floating point types only. */
RyoheiHagimoto 0:0e0631af0305 739 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 740 inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 741 {
RyoheiHagimoto 0:0e0631af0305 742 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 743 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 744 c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i];
RyoheiHagimoto 0:0e0631af0305 745 return c;
RyoheiHagimoto 0:0e0631af0305 746 }
RyoheiHagimoto 0:0e0631af0305 747
RyoheiHagimoto 0:0e0631af0305 748 /** @brief Multiply and add
RyoheiHagimoto 0:0e0631af0305 749
RyoheiHagimoto 0:0e0631af0305 750 Returns \f$ a*b + c \f$
RyoheiHagimoto 0:0e0631af0305 751 For floating point types only. */
RyoheiHagimoto 0:0e0631af0305 752 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 753 inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
RyoheiHagimoto 0:0e0631af0305 754 const v_reg<_Tp, n>& c)
RyoheiHagimoto 0:0e0631af0305 755 {
RyoheiHagimoto 0:0e0631af0305 756 v_reg<_Tp, n> d;
RyoheiHagimoto 0:0e0631af0305 757 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 758 d.s[i] = a.s[i]*b.s[i] + c.s[i];
RyoheiHagimoto 0:0e0631af0305 759 return d;
RyoheiHagimoto 0:0e0631af0305 760 }
RyoheiHagimoto 0:0e0631af0305 761
RyoheiHagimoto 0:0e0631af0305 762 /** @brief Dot product of elements
RyoheiHagimoto 0:0e0631af0305 763
RyoheiHagimoto 0:0e0631af0305 764 Multiply values in two registers and sum adjacent result pairs.
RyoheiHagimoto 0:0e0631af0305 765 Scheme:
RyoheiHagimoto 0:0e0631af0305 766 @code
RyoheiHagimoto 0:0e0631af0305 767 {A1 A2 ...} // 16-bit
RyoheiHagimoto 0:0e0631af0305 768 x {B1 B2 ...} // 16-bit
RyoheiHagimoto 0:0e0631af0305 769 -------------
RyoheiHagimoto 0:0e0631af0305 770 {A1B1+A2B2 ...} // 32-bit
RyoheiHagimoto 0:0e0631af0305 771 @endcode
RyoheiHagimoto 0:0e0631af0305 772 Implemented only for 16-bit signed source type (v_int16x8).
RyoheiHagimoto 0:0e0631af0305 773 */
RyoheiHagimoto 0:0e0631af0305 774 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>
RyoheiHagimoto 0:0e0631af0305 775 v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 776 {
RyoheiHagimoto 0:0e0631af0305 777 typedef typename V_TypeTraits<_Tp>::w_type w_type;
RyoheiHagimoto 0:0e0631af0305 778 v_reg<w_type, n/2> c;
RyoheiHagimoto 0:0e0631af0305 779 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 780 c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1];
RyoheiHagimoto 0:0e0631af0305 781 return c;
RyoheiHagimoto 0:0e0631af0305 782 }
RyoheiHagimoto 0:0e0631af0305 783
RyoheiHagimoto 0:0e0631af0305 784 /** @brief Multiply and expand
RyoheiHagimoto 0:0e0631af0305 785
RyoheiHagimoto 0:0e0631af0305 786 Multiply values two registers and store results in two registers with wider pack type.
RyoheiHagimoto 0:0e0631af0305 787 Scheme:
RyoheiHagimoto 0:0e0631af0305 788 @code
RyoheiHagimoto 0:0e0631af0305 789 {A B C D} // 32-bit
RyoheiHagimoto 0:0e0631af0305 790 x {E F G H} // 32-bit
RyoheiHagimoto 0:0e0631af0305 791 ---------------
RyoheiHagimoto 0:0e0631af0305 792 {AE BF} // 64-bit
RyoheiHagimoto 0:0e0631af0305 793 {CG DH} // 64-bit
RyoheiHagimoto 0:0e0631af0305 794 @endcode
RyoheiHagimoto 0:0e0631af0305 795 Example:
RyoheiHagimoto 0:0e0631af0305 796 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 797 v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
RyoheiHagimoto 0:0e0631af0305 798 v_uint64x2 c, d; // results
RyoheiHagimoto 0:0e0631af0305 799 v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}
RyoheiHagimoto 0:0e0631af0305 800 @endcode
RyoheiHagimoto 0:0e0631af0305 801 Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).
RyoheiHagimoto 0:0e0631af0305 802 */
RyoheiHagimoto 0:0e0631af0305 803 template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
RyoheiHagimoto 0:0e0631af0305 804 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c,
RyoheiHagimoto 0:0e0631af0305 805 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& d)
RyoheiHagimoto 0:0e0631af0305 806 {
RyoheiHagimoto 0:0e0631af0305 807 typedef typename V_TypeTraits<_Tp>::w_type w_type;
RyoheiHagimoto 0:0e0631af0305 808 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 809 {
RyoheiHagimoto 0:0e0631af0305 810 c.s[i] = (w_type)a.s[i]*b.s[i];
RyoheiHagimoto 0:0e0631af0305 811 d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 812 }
RyoheiHagimoto 0:0e0631af0305 813 }
RyoheiHagimoto 0:0e0631af0305 814
RyoheiHagimoto 0:0e0631af0305 815 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 816 template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 817 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c)
RyoheiHagimoto 0:0e0631af0305 818 {
RyoheiHagimoto 0:0e0631af0305 819 typedef typename V_TypeTraits<_Tp>::w_type w_type;
RyoheiHagimoto 0:0e0631af0305 820 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 821 {
RyoheiHagimoto 0:0e0631af0305 822 c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1];
RyoheiHagimoto 0:0e0631af0305 823 }
RyoheiHagimoto 0:0e0631af0305 824 }
RyoheiHagimoto 0:0e0631af0305 825 //! @endcond
RyoheiHagimoto 0:0e0631af0305 826
RyoheiHagimoto 0:0e0631af0305 827 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 828 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 829 #define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \
RyoheiHagimoto 0:0e0631af0305 830 template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \
RyoheiHagimoto 0:0e0631af0305 831 { \
RyoheiHagimoto 0:0e0631af0305 832 v_reg<_Tp, n> c; \
RyoheiHagimoto 0:0e0631af0305 833 for( int i = 0; i < n; i++ ) \
RyoheiHagimoto 0:0e0631af0305 834 c.s[i] = (_Tp)(a.s[i] shift_op imm); \
RyoheiHagimoto 0:0e0631af0305 835 return c; \
RyoheiHagimoto 0:0e0631af0305 836 }
RyoheiHagimoto 0:0e0631af0305 837
RyoheiHagimoto 0:0e0631af0305 838 /** @brief Bitwise shift left
RyoheiHagimoto 0:0e0631af0305 839
RyoheiHagimoto 0:0e0631af0305 840 For 16-, 32- and 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 841 OPENCV_HAL_IMPL_SHIFT_OP(<<)
RyoheiHagimoto 0:0e0631af0305 842
RyoheiHagimoto 0:0e0631af0305 843 /** @brief Bitwise shift right
RyoheiHagimoto 0:0e0631af0305 844
RyoheiHagimoto 0:0e0631af0305 845 For 16-, 32- and 64-bit integer values. */
RyoheiHagimoto 0:0e0631af0305 846 OPENCV_HAL_IMPL_SHIFT_OP(>>)
RyoheiHagimoto 0:0e0631af0305 847
RyoheiHagimoto 0:0e0631af0305 848 /** @brief Sum packed values
RyoheiHagimoto 0:0e0631af0305 849
RyoheiHagimoto 0:0e0631af0305 850 Scheme:
RyoheiHagimoto 0:0e0631af0305 851 @code
RyoheiHagimoto 0:0e0631af0305 852 {A1 A2 A3 ...} => sum{A1,A2,A3,...}
RyoheiHagimoto 0:0e0631af0305 853 @endcode
RyoheiHagimoto 0:0e0631af0305 854 For 32-bit integer and 32-bit floating point types.*/
RyoheiHagimoto 0:0e0631af0305 855 template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 856 {
RyoheiHagimoto 0:0e0631af0305 857 typename V_TypeTraits<_Tp>::sum_type c = a.s[0];
RyoheiHagimoto 0:0e0631af0305 858 for( int i = 1; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 859 c += a.s[i];
RyoheiHagimoto 0:0e0631af0305 860 return c;
RyoheiHagimoto 0:0e0631af0305 861 }
RyoheiHagimoto 0:0e0631af0305 862
RyoheiHagimoto 0:0e0631af0305 863 /** @brief Get negative values mask
RyoheiHagimoto 0:0e0631af0305 864
RyoheiHagimoto 0:0e0631af0305 865 Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes.
RyoheiHagimoto 0:0e0631af0305 866 Example:
RyoheiHagimoto 0:0e0631af0305 867 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 868 v_int32x4 r; // set to {-1, -1, 1, 1}
RyoheiHagimoto 0:0e0631af0305 869 int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
RyoheiHagimoto 0:0e0631af0305 870 @endcode
RyoheiHagimoto 0:0e0631af0305 871 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 872 template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 873 {
RyoheiHagimoto 0:0e0631af0305 874 int mask = 0;
RyoheiHagimoto 0:0e0631af0305 875 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 876 mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i;
RyoheiHagimoto 0:0e0631af0305 877 return mask;
RyoheiHagimoto 0:0e0631af0305 878 }
RyoheiHagimoto 0:0e0631af0305 879
RyoheiHagimoto 0:0e0631af0305 880 /** @brief Check if all packed values are less than zero
RyoheiHagimoto 0:0e0631af0305 881
RyoheiHagimoto 0:0e0631af0305 882 Unsigned values will be casted to signed: `uchar 254 => char -2`.
RyoheiHagimoto 0:0e0631af0305 883 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 884 template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 885 {
RyoheiHagimoto 0:0e0631af0305 886 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 887 if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 )
RyoheiHagimoto 0:0e0631af0305 888 return false;
RyoheiHagimoto 0:0e0631af0305 889 return true;
RyoheiHagimoto 0:0e0631af0305 890 }
RyoheiHagimoto 0:0e0631af0305 891
RyoheiHagimoto 0:0e0631af0305 892 /** @brief Check if any of packed values is less than zero
RyoheiHagimoto 0:0e0631af0305 893
RyoheiHagimoto 0:0e0631af0305 894 Unsigned values will be casted to signed: `uchar 254 => char -2`.
RyoheiHagimoto 0:0e0631af0305 895 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 896 template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 897 {
RyoheiHagimoto 0:0e0631af0305 898 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 899 if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 )
RyoheiHagimoto 0:0e0631af0305 900 return true;
RyoheiHagimoto 0:0e0631af0305 901 return false;
RyoheiHagimoto 0:0e0631af0305 902 }
RyoheiHagimoto 0:0e0631af0305 903
RyoheiHagimoto 0:0e0631af0305 904 /** @brief Bitwise select
RyoheiHagimoto 0:0e0631af0305 905
RyoheiHagimoto 0:0e0631af0305 906 Return value will be built by combining values a and b using the following scheme:
RyoheiHagimoto 0:0e0631af0305 907 If the i-th bit in _mask_ is 1
RyoheiHagimoto 0:0e0631af0305 908 select i-th bit from _a_
RyoheiHagimoto 0:0e0631af0305 909 else
RyoheiHagimoto 0:0e0631af0305 910 select i-th bit from _b_ */
RyoheiHagimoto 0:0e0631af0305 911 template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask,
RyoheiHagimoto 0:0e0631af0305 912 const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 913 {
RyoheiHagimoto 0:0e0631af0305 914 typedef V_TypeTraits<_Tp> Traits;
RyoheiHagimoto 0:0e0631af0305 915 typedef typename Traits::int_type int_type;
RyoheiHagimoto 0:0e0631af0305 916 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 917 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 918 {
RyoheiHagimoto 0:0e0631af0305 919 int_type m = Traits::reinterpret_int(mask.s[i]);
RyoheiHagimoto 0:0e0631af0305 920 c.s[i] = Traits::reinterpret_from_int((Traits::reinterpret_int(a.s[i]) & m)
RyoheiHagimoto 0:0e0631af0305 921 | (Traits::reinterpret_int(b.s[i]) & ~m));
RyoheiHagimoto 0:0e0631af0305 922 }
RyoheiHagimoto 0:0e0631af0305 923 return c;
RyoheiHagimoto 0:0e0631af0305 924 }
RyoheiHagimoto 0:0e0631af0305 925
RyoheiHagimoto 0:0e0631af0305 926 /** @brief Expand values to the wider pack type
RyoheiHagimoto 0:0e0631af0305 927
RyoheiHagimoto 0:0e0631af0305 928 Copy contents of register to two registers with 2x wider pack type.
RyoheiHagimoto 0:0e0631af0305 929 Scheme:
RyoheiHagimoto 0:0e0631af0305 930 @code
RyoheiHagimoto 0:0e0631af0305 931 int32x4 int64x2 int64x2
RyoheiHagimoto 0:0e0631af0305 932 {A B C D} ==> {A B} , {C D}
RyoheiHagimoto 0:0e0631af0305 933 @endcode */
RyoheiHagimoto 0:0e0631af0305 934 template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 935 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b0,
RyoheiHagimoto 0:0e0631af0305 936 v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b1)
RyoheiHagimoto 0:0e0631af0305 937 {
RyoheiHagimoto 0:0e0631af0305 938 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 939 {
RyoheiHagimoto 0:0e0631af0305 940 b0.s[i] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 941 b1.s[i] = a.s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 942 }
RyoheiHagimoto 0:0e0631af0305 943 }
RyoheiHagimoto 0:0e0631af0305 944
RyoheiHagimoto 0:0e0631af0305 945 //! @cond IGNORED
RyoheiHagimoto 0:0e0631af0305 946 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n>
RyoheiHagimoto 0:0e0631af0305 947 v_reinterpret_as_int(const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 948 {
RyoheiHagimoto 0:0e0631af0305 949 v_reg<typename V_TypeTraits<_Tp>::int_type, n> c;
RyoheiHagimoto 0:0e0631af0305 950 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 951 c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 952 return c;
RyoheiHagimoto 0:0e0631af0305 953 }
RyoheiHagimoto 0:0e0631af0305 954
RyoheiHagimoto 0:0e0631af0305 955 template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n>
RyoheiHagimoto 0:0e0631af0305 956 v_reinterpret_as_uint(const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 957 {
RyoheiHagimoto 0:0e0631af0305 958 v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c;
RyoheiHagimoto 0:0e0631af0305 959 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 960 c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 961 return c;
RyoheiHagimoto 0:0e0631af0305 962 }
RyoheiHagimoto 0:0e0631af0305 963 //! @endcond
RyoheiHagimoto 0:0e0631af0305 964
RyoheiHagimoto 0:0e0631af0305 965 /** @brief Interleave two vectors
RyoheiHagimoto 0:0e0631af0305 966
RyoheiHagimoto 0:0e0631af0305 967 Scheme:
RyoheiHagimoto 0:0e0631af0305 968 @code
RyoheiHagimoto 0:0e0631af0305 969 {A1 A2 A3 A4}
RyoheiHagimoto 0:0e0631af0305 970 {B1 B2 B3 B4}
RyoheiHagimoto 0:0e0631af0305 971 ---------------
RyoheiHagimoto 0:0e0631af0305 972 {A1 B1 A2 B2} and {A3 B3 A4 B4}
RyoheiHagimoto 0:0e0631af0305 973 @endcode
RyoheiHagimoto 0:0e0631af0305 974 For all types except 64-bit.
RyoheiHagimoto 0:0e0631af0305 975 */
RyoheiHagimoto 0:0e0631af0305 976 template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1,
RyoheiHagimoto 0:0e0631af0305 977 v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 )
RyoheiHagimoto 0:0e0631af0305 978 {
RyoheiHagimoto 0:0e0631af0305 979 int i;
RyoheiHagimoto 0:0e0631af0305 980 for( i = 0; i < n/2; i++ )
RyoheiHagimoto 0:0e0631af0305 981 {
RyoheiHagimoto 0:0e0631af0305 982 b0.s[i*2] = a0.s[i];
RyoheiHagimoto 0:0e0631af0305 983 b0.s[i*2+1] = a1.s[i];
RyoheiHagimoto 0:0e0631af0305 984 }
RyoheiHagimoto 0:0e0631af0305 985 for( ; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 986 {
RyoheiHagimoto 0:0e0631af0305 987 b1.s[i*2-n] = a0.s[i];
RyoheiHagimoto 0:0e0631af0305 988 b1.s[i*2-n+1] = a1.s[i];
RyoheiHagimoto 0:0e0631af0305 989 }
RyoheiHagimoto 0:0e0631af0305 990 }
RyoheiHagimoto 0:0e0631af0305 991
RyoheiHagimoto 0:0e0631af0305 992 /** @brief Load register contents from memory
RyoheiHagimoto 0:0e0631af0305 993
RyoheiHagimoto 0:0e0631af0305 994 @param ptr pointer to memory block with data
RyoheiHagimoto 0:0e0631af0305 995 @return register object
RyoheiHagimoto 0:0e0631af0305 996
RyoheiHagimoto 0:0e0631af0305 997 @note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
RyoheiHagimoto 0:0e0631af0305 998 */
RyoheiHagimoto 0:0e0631af0305 999 template<typename _Tp>
RyoheiHagimoto 0:0e0631af0305 1000 inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr)
RyoheiHagimoto 0:0e0631af0305 1001 {
RyoheiHagimoto 0:0e0631af0305 1002 return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
RyoheiHagimoto 0:0e0631af0305 1003 }
RyoheiHagimoto 0:0e0631af0305 1004
RyoheiHagimoto 0:0e0631af0305 1005 /** @brief Load register contents from memory (aligned)
RyoheiHagimoto 0:0e0631af0305 1006
RyoheiHagimoto 0:0e0631af0305 1007 similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
RyoheiHagimoto 0:0e0631af0305 1008 */
RyoheiHagimoto 0:0e0631af0305 1009 template<typename _Tp>
RyoheiHagimoto 0:0e0631af0305 1010 inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr)
RyoheiHagimoto 0:0e0631af0305 1011 {
RyoheiHagimoto 0:0e0631af0305 1012 return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr);
RyoheiHagimoto 0:0e0631af0305 1013 }
RyoheiHagimoto 0:0e0631af0305 1014
RyoheiHagimoto 0:0e0631af0305 1015 /** @brief Load register contents from two memory blocks
RyoheiHagimoto 0:0e0631af0305 1016
RyoheiHagimoto 0:0e0631af0305 1017 @param loptr memory block containing data for first half (0..n/2)
RyoheiHagimoto 0:0e0631af0305 1018 @param hiptr memory block containing data for second half (n/2..n)
RyoheiHagimoto 0:0e0631af0305 1019
RyoheiHagimoto 0:0e0631af0305 1020 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 1021 int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };
RyoheiHagimoto 0:0e0631af0305 1022 v_int32x4 r = v_load_halves(lo, hi);
RyoheiHagimoto 0:0e0631af0305 1023 @endcode
RyoheiHagimoto 0:0e0631af0305 1024 */
RyoheiHagimoto 0:0e0631af0305 1025 template<typename _Tp>
RyoheiHagimoto 0:0e0631af0305 1026 inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_halves(const _Tp* loptr, const _Tp* hiptr)
RyoheiHagimoto 0:0e0631af0305 1027 {
RyoheiHagimoto 0:0e0631af0305 1028 v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c;
RyoheiHagimoto 0:0e0631af0305 1029 for( int i = 0; i < c.nlanes/2; i++ )
RyoheiHagimoto 0:0e0631af0305 1030 {
RyoheiHagimoto 0:0e0631af0305 1031 c.s[i] = loptr[i];
RyoheiHagimoto 0:0e0631af0305 1032 c.s[i+c.nlanes/2] = hiptr[i];
RyoheiHagimoto 0:0e0631af0305 1033 }
RyoheiHagimoto 0:0e0631af0305 1034 return c;
RyoheiHagimoto 0:0e0631af0305 1035 }
RyoheiHagimoto 0:0e0631af0305 1036
RyoheiHagimoto 0:0e0631af0305 1037 /** @brief Load register contents from memory with double expand
RyoheiHagimoto 0:0e0631af0305 1038
RyoheiHagimoto 0:0e0631af0305 1039 Same as cv::v_load, but result pack type will be 2x wider than memory type.
RyoheiHagimoto 0:0e0631af0305 1040
RyoheiHagimoto 0:0e0631af0305 1041 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 1042 short buf[4] = {1, 2, 3, 4}; // type is int16
RyoheiHagimoto 0:0e0631af0305 1043 v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32
RyoheiHagimoto 0:0e0631af0305 1044 @endcode
RyoheiHagimoto 0:0e0631af0305 1045 For 8-, 16-, 32-bit integer source types. */
RyoheiHagimoto 0:0e0631af0305 1046 template<typename _Tp>
RyoheiHagimoto 0:0e0631af0305 1047 inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2>
RyoheiHagimoto 0:0e0631af0305 1048 v_load_expand(const _Tp* ptr)
RyoheiHagimoto 0:0e0631af0305 1049 {
RyoheiHagimoto 0:0e0631af0305 1050 typedef typename V_TypeTraits<_Tp>::w_type w_type;
RyoheiHagimoto 0:0e0631af0305 1051 v_reg<w_type, V_SIMD128Traits<w_type>::nlanes> c;
RyoheiHagimoto 0:0e0631af0305 1052 for( int i = 0; i < c.nlanes; i++ )
RyoheiHagimoto 0:0e0631af0305 1053 {
RyoheiHagimoto 0:0e0631af0305 1054 c.s[i] = ptr[i];
RyoheiHagimoto 0:0e0631af0305 1055 }
RyoheiHagimoto 0:0e0631af0305 1056 return c;
RyoheiHagimoto 0:0e0631af0305 1057 }
RyoheiHagimoto 0:0e0631af0305 1058
RyoheiHagimoto 0:0e0631af0305 1059 /** @brief Load register contents from memory with quad expand
RyoheiHagimoto 0:0e0631af0305 1060
RyoheiHagimoto 0:0e0631af0305 1061 Same as cv::v_load_expand, but result type is 4 times wider than source.
RyoheiHagimoto 0:0e0631af0305 1062 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 1063 char buf[4] = {1, 2, 3, 4}; // type is int8
RyoheiHagimoto 0:0e0631af0305 1064 v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32
RyoheiHagimoto 0:0e0631af0305 1065 @endcode
RyoheiHagimoto 0:0e0631af0305 1066 For 8-bit integer source types. */
RyoheiHagimoto 0:0e0631af0305 1067 template<typename _Tp>
RyoheiHagimoto 0:0e0631af0305 1068 inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4>
RyoheiHagimoto 0:0e0631af0305 1069 v_load_expand_q(const _Tp* ptr)
RyoheiHagimoto 0:0e0631af0305 1070 {
RyoheiHagimoto 0:0e0631af0305 1071 typedef typename V_TypeTraits<_Tp>::q_type q_type;
RyoheiHagimoto 0:0e0631af0305 1072 v_reg<q_type, V_SIMD128Traits<q_type>::nlanes> c;
RyoheiHagimoto 0:0e0631af0305 1073 for( int i = 0; i < c.nlanes; i++ )
RyoheiHagimoto 0:0e0631af0305 1074 {
RyoheiHagimoto 0:0e0631af0305 1075 c.s[i] = ptr[i];
RyoheiHagimoto 0:0e0631af0305 1076 }
RyoheiHagimoto 0:0e0631af0305 1077 return c;
RyoheiHagimoto 0:0e0631af0305 1078 }
RyoheiHagimoto 0:0e0631af0305 1079
RyoheiHagimoto 0:0e0631af0305 1080 /** @brief Load and deinterleave (2 channels)
RyoheiHagimoto 0:0e0631af0305 1081
RyoheiHagimoto 0:0e0631af0305 1082 Load data from memory deinterleave and store to 2 registers.
RyoheiHagimoto 0:0e0631af0305 1083 Scheme:
RyoheiHagimoto 0:0e0631af0305 1084 @code
RyoheiHagimoto 0:0e0631af0305 1085 {A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...}
RyoheiHagimoto 0:0e0631af0305 1086 @endcode
RyoheiHagimoto 0:0e0631af0305 1087 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1088 template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 1089 v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 1090 {
RyoheiHagimoto 0:0e0631af0305 1091 int i, i2;
RyoheiHagimoto 0:0e0631af0305 1092 for( i = i2 = 0; i < n; i++, i2 += 2 )
RyoheiHagimoto 0:0e0631af0305 1093 {
RyoheiHagimoto 0:0e0631af0305 1094 a.s[i] = ptr[i2];
RyoheiHagimoto 0:0e0631af0305 1095 b.s[i] = ptr[i2+1];
RyoheiHagimoto 0:0e0631af0305 1096 }
RyoheiHagimoto 0:0e0631af0305 1097 }
RyoheiHagimoto 0:0e0631af0305 1098
RyoheiHagimoto 0:0e0631af0305 1099 /** @brief Load and deinterleave (3 channels)
RyoheiHagimoto 0:0e0631af0305 1100
RyoheiHagimoto 0:0e0631af0305 1101 Load data from memory deinterleave and store to 3 registers.
RyoheiHagimoto 0:0e0631af0305 1102 Scheme:
RyoheiHagimoto 0:0e0631af0305 1103 @code
RyoheiHagimoto 0:0e0631af0305 1104 {A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
RyoheiHagimoto 0:0e0631af0305 1105 @endcode
RyoheiHagimoto 0:0e0631af0305 1106 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1107 template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 1108 v_reg<_Tp, n>& b, v_reg<_Tp, n>& c)
RyoheiHagimoto 0:0e0631af0305 1109 {
RyoheiHagimoto 0:0e0631af0305 1110 int i, i3;
RyoheiHagimoto 0:0e0631af0305 1111 for( i = i3 = 0; i < n; i++, i3 += 3 )
RyoheiHagimoto 0:0e0631af0305 1112 {
RyoheiHagimoto 0:0e0631af0305 1113 a.s[i] = ptr[i3];
RyoheiHagimoto 0:0e0631af0305 1114 b.s[i] = ptr[i3+1];
RyoheiHagimoto 0:0e0631af0305 1115 c.s[i] = ptr[i3+2];
RyoheiHagimoto 0:0e0631af0305 1116 }
RyoheiHagimoto 0:0e0631af0305 1117 }
RyoheiHagimoto 0:0e0631af0305 1118
RyoheiHagimoto 0:0e0631af0305 1119 /** @brief Load and deinterleave (4 channels)
RyoheiHagimoto 0:0e0631af0305 1120
RyoheiHagimoto 0:0e0631af0305 1121 Load data from memory deinterleave and store to 4 registers.
RyoheiHagimoto 0:0e0631af0305 1122 Scheme:
RyoheiHagimoto 0:0e0631af0305 1123 @code
RyoheiHagimoto 0:0e0631af0305 1124 {A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}
RyoheiHagimoto 0:0e0631af0305 1125 @endcode
RyoheiHagimoto 0:0e0631af0305 1126 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1127 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1128 inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 1129 v_reg<_Tp, n>& b, v_reg<_Tp, n>& c,
RyoheiHagimoto 0:0e0631af0305 1130 v_reg<_Tp, n>& d)
RyoheiHagimoto 0:0e0631af0305 1131 {
RyoheiHagimoto 0:0e0631af0305 1132 int i, i4;
RyoheiHagimoto 0:0e0631af0305 1133 for( i = i4 = 0; i < n; i++, i4 += 4 )
RyoheiHagimoto 0:0e0631af0305 1134 {
RyoheiHagimoto 0:0e0631af0305 1135 a.s[i] = ptr[i4];
RyoheiHagimoto 0:0e0631af0305 1136 b.s[i] = ptr[i4+1];
RyoheiHagimoto 0:0e0631af0305 1137 c.s[i] = ptr[i4+2];
RyoheiHagimoto 0:0e0631af0305 1138 d.s[i] = ptr[i4+3];
RyoheiHagimoto 0:0e0631af0305 1139 }
RyoheiHagimoto 0:0e0631af0305 1140 }
RyoheiHagimoto 0:0e0631af0305 1141
RyoheiHagimoto 0:0e0631af0305 1142 /** @brief Interleave and store (2 channels)
RyoheiHagimoto 0:0e0631af0305 1143
RyoheiHagimoto 0:0e0631af0305 1144 Interleave and store data from 2 registers to memory.
RyoheiHagimoto 0:0e0631af0305 1145 Scheme:
RyoheiHagimoto 0:0e0631af0305 1146 @code
RyoheiHagimoto 0:0e0631af0305 1147 {A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...}
RyoheiHagimoto 0:0e0631af0305 1148 @endcode
RyoheiHagimoto 0:0e0631af0305 1149 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1150 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1151 inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 1152 const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 1153 {
RyoheiHagimoto 0:0e0631af0305 1154 int i, i2;
RyoheiHagimoto 0:0e0631af0305 1155 for( i = i2 = 0; i < n; i++, i2 += 2 )
RyoheiHagimoto 0:0e0631af0305 1156 {
RyoheiHagimoto 0:0e0631af0305 1157 ptr[i2] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1158 ptr[i2+1] = b.s[i];
RyoheiHagimoto 0:0e0631af0305 1159 }
RyoheiHagimoto 0:0e0631af0305 1160 }
RyoheiHagimoto 0:0e0631af0305 1161
RyoheiHagimoto 0:0e0631af0305 1162 /** @brief Interleave and store (3 channels)
RyoheiHagimoto 0:0e0631af0305 1163
RyoheiHagimoto 0:0e0631af0305 1164 Interleave and store data from 3 registers to memory.
RyoheiHagimoto 0:0e0631af0305 1165 Scheme:
RyoheiHagimoto 0:0e0631af0305 1166 @code
RyoheiHagimoto 0:0e0631af0305 1167 {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...}
RyoheiHagimoto 0:0e0631af0305 1168 @endcode
RyoheiHagimoto 0:0e0631af0305 1169 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1170 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1171 inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 1172 const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c)
RyoheiHagimoto 0:0e0631af0305 1173 {
RyoheiHagimoto 0:0e0631af0305 1174 int i, i3;
RyoheiHagimoto 0:0e0631af0305 1175 for( i = i3 = 0; i < n; i++, i3 += 3 )
RyoheiHagimoto 0:0e0631af0305 1176 {
RyoheiHagimoto 0:0e0631af0305 1177 ptr[i3] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1178 ptr[i3+1] = b.s[i];
RyoheiHagimoto 0:0e0631af0305 1179 ptr[i3+2] = c.s[i];
RyoheiHagimoto 0:0e0631af0305 1180 }
RyoheiHagimoto 0:0e0631af0305 1181 }
RyoheiHagimoto 0:0e0631af0305 1182
RyoheiHagimoto 0:0e0631af0305 1183 /** @brief Interleave and store (4 channels)
RyoheiHagimoto 0:0e0631af0305 1184
RyoheiHagimoto 0:0e0631af0305 1185 Interleave and store data from 4 registers to memory.
RyoheiHagimoto 0:0e0631af0305 1186 Scheme:
RyoheiHagimoto 0:0e0631af0305 1187 @code
RyoheiHagimoto 0:0e0631af0305 1188 {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}
RyoheiHagimoto 0:0e0631af0305 1189 @endcode
RyoheiHagimoto 0:0e0631af0305 1190 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1191 template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a,
RyoheiHagimoto 0:0e0631af0305 1192 const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c,
RyoheiHagimoto 0:0e0631af0305 1193 const v_reg<_Tp, n>& d)
RyoheiHagimoto 0:0e0631af0305 1194 {
RyoheiHagimoto 0:0e0631af0305 1195 int i, i4;
RyoheiHagimoto 0:0e0631af0305 1196 for( i = i4 = 0; i < n; i++, i4 += 4 )
RyoheiHagimoto 0:0e0631af0305 1197 {
RyoheiHagimoto 0:0e0631af0305 1198 ptr[i4] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1199 ptr[i4+1] = b.s[i];
RyoheiHagimoto 0:0e0631af0305 1200 ptr[i4+2] = c.s[i];
RyoheiHagimoto 0:0e0631af0305 1201 ptr[i4+3] = d.s[i];
RyoheiHagimoto 0:0e0631af0305 1202 }
RyoheiHagimoto 0:0e0631af0305 1203 }
RyoheiHagimoto 0:0e0631af0305 1204
RyoheiHagimoto 0:0e0631af0305 1205 /** @brief Store data to memory
RyoheiHagimoto 0:0e0631af0305 1206
RyoheiHagimoto 0:0e0631af0305 1207 Store register contents to memory.
RyoheiHagimoto 0:0e0631af0305 1208 Scheme:
RyoheiHagimoto 0:0e0631af0305 1209 @code
RyoheiHagimoto 0:0e0631af0305 1210 REG {A B C D} ==> MEM {A B C D}
RyoheiHagimoto 0:0e0631af0305 1211 @endcode
RyoheiHagimoto 0:0e0631af0305 1212 Pointer can be unaligned. */
RyoheiHagimoto 0:0e0631af0305 1213 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1214 inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 1215 {
RyoheiHagimoto 0:0e0631af0305 1216 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1217 ptr[i] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1218 }
RyoheiHagimoto 0:0e0631af0305 1219
RyoheiHagimoto 0:0e0631af0305 1220 /** @brief Store data to memory (lower half)
RyoheiHagimoto 0:0e0631af0305 1221
RyoheiHagimoto 0:0e0631af0305 1222 Store lower half of register contents to memory.
RyoheiHagimoto 0:0e0631af0305 1223 Scheme:
RyoheiHagimoto 0:0e0631af0305 1224 @code
RyoheiHagimoto 0:0e0631af0305 1225 REG {A B C D} ==> MEM {A B}
RyoheiHagimoto 0:0e0631af0305 1226 @endcode */
RyoheiHagimoto 0:0e0631af0305 1227 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1228 inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 1229 {
RyoheiHagimoto 0:0e0631af0305 1230 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 1231 ptr[i] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1232 }
RyoheiHagimoto 0:0e0631af0305 1233
RyoheiHagimoto 0:0e0631af0305 1234 /** @brief Store data to memory (higher half)
RyoheiHagimoto 0:0e0631af0305 1235
RyoheiHagimoto 0:0e0631af0305 1236 Store higher half of register contents to memory.
RyoheiHagimoto 0:0e0631af0305 1237 Scheme:
RyoheiHagimoto 0:0e0631af0305 1238 @code
RyoheiHagimoto 0:0e0631af0305 1239 REG {A B C D} ==> MEM {C D}
RyoheiHagimoto 0:0e0631af0305 1240 @endcode */
RyoheiHagimoto 0:0e0631af0305 1241 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1242 inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 1243 {
RyoheiHagimoto 0:0e0631af0305 1244 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 1245 ptr[i] = a.s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 1246 }
RyoheiHagimoto 0:0e0631af0305 1247
RyoheiHagimoto 0:0e0631af0305 1248 /** @brief Store data to memory (aligned)
RyoheiHagimoto 0:0e0631af0305 1249
RyoheiHagimoto 0:0e0631af0305 1250 Store register contents to memory.
RyoheiHagimoto 0:0e0631af0305 1251 Scheme:
RyoheiHagimoto 0:0e0631af0305 1252 @code
RyoheiHagimoto 0:0e0631af0305 1253 REG {A B C D} ==> MEM {A B C D}
RyoheiHagimoto 0:0e0631af0305 1254 @endcode
RyoheiHagimoto 0:0e0631af0305 1255 Pointer __should__ be aligned by 16-byte boundary. */
RyoheiHagimoto 0:0e0631af0305 1256 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1257 inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a)
RyoheiHagimoto 0:0e0631af0305 1258 {
RyoheiHagimoto 0:0e0631af0305 1259 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1260 ptr[i] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1261 }
RyoheiHagimoto 0:0e0631af0305 1262
RyoheiHagimoto 0:0e0631af0305 1263 /** @brief Combine vector from first elements of two vectors
RyoheiHagimoto 0:0e0631af0305 1264
RyoheiHagimoto 0:0e0631af0305 1265 Scheme:
RyoheiHagimoto 0:0e0631af0305 1266 @code
RyoheiHagimoto 0:0e0631af0305 1267 {A1 A2 A3 A4}
RyoheiHagimoto 0:0e0631af0305 1268 {B1 B2 B3 B4}
RyoheiHagimoto 0:0e0631af0305 1269 ---------------
RyoheiHagimoto 0:0e0631af0305 1270 {A1 A2 B1 B2}
RyoheiHagimoto 0:0e0631af0305 1271 @endcode
RyoheiHagimoto 0:0e0631af0305 1272 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1273 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1274 inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 1275 {
RyoheiHagimoto 0:0e0631af0305 1276 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 1277 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 1278 {
RyoheiHagimoto 0:0e0631af0305 1279 c.s[i] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1280 c.s[i+(n/2)] = b.s[i];
RyoheiHagimoto 0:0e0631af0305 1281 }
RyoheiHagimoto 0:0e0631af0305 1282 return c;
RyoheiHagimoto 0:0e0631af0305 1283 }
RyoheiHagimoto 0:0e0631af0305 1284
RyoheiHagimoto 0:0e0631af0305 1285 /** @brief Combine vector from last elements of two vectors
RyoheiHagimoto 0:0e0631af0305 1286
RyoheiHagimoto 0:0e0631af0305 1287 Scheme:
RyoheiHagimoto 0:0e0631af0305 1288 @code
RyoheiHagimoto 0:0e0631af0305 1289 {A1 A2 A3 A4}
RyoheiHagimoto 0:0e0631af0305 1290 {B1 B2 B3 B4}
RyoheiHagimoto 0:0e0631af0305 1291 ---------------
RyoheiHagimoto 0:0e0631af0305 1292 {A3 A4 B3 B4}
RyoheiHagimoto 0:0e0631af0305 1293 @endcode
RyoheiHagimoto 0:0e0631af0305 1294 For all types except 64-bit. */
RyoheiHagimoto 0:0e0631af0305 1295 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1296 inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 1297 {
RyoheiHagimoto 0:0e0631af0305 1298 v_reg<_Tp, n> c;
RyoheiHagimoto 0:0e0631af0305 1299 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 1300 {
RyoheiHagimoto 0:0e0631af0305 1301 c.s[i] = a.s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 1302 c.s[i+(n/2)] = b.s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 1303 }
RyoheiHagimoto 0:0e0631af0305 1304 return c;
RyoheiHagimoto 0:0e0631af0305 1305 }
RyoheiHagimoto 0:0e0631af0305 1306
RyoheiHagimoto 0:0e0631af0305 1307 /** @brief Combine two vectors from lower and higher parts of two other vectors
RyoheiHagimoto 0:0e0631af0305 1308
RyoheiHagimoto 0:0e0631af0305 1309 @code{.cpp}
RyoheiHagimoto 0:0e0631af0305 1310 low = cv::v_combine_low(a, b);
RyoheiHagimoto 0:0e0631af0305 1311 high = cv::v_combine_high(a, b);
RyoheiHagimoto 0:0e0631af0305 1312 @endcode */
RyoheiHagimoto 0:0e0631af0305 1313 template<typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1314 inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b,
RyoheiHagimoto 0:0e0631af0305 1315 v_reg<_Tp, n>& low, v_reg<_Tp, n>& high)
RyoheiHagimoto 0:0e0631af0305 1316 {
RyoheiHagimoto 0:0e0631af0305 1317 for( int i = 0; i < (n/2); i++ )
RyoheiHagimoto 0:0e0631af0305 1318 {
RyoheiHagimoto 0:0e0631af0305 1319 low.s[i] = a.s[i];
RyoheiHagimoto 0:0e0631af0305 1320 low.s[i+(n/2)] = b.s[i];
RyoheiHagimoto 0:0e0631af0305 1321 high.s[i] = a.s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 1322 high.s[i+(n/2)] = b.s[i+(n/2)];
RyoheiHagimoto 0:0e0631af0305 1323 }
RyoheiHagimoto 0:0e0631af0305 1324 }
RyoheiHagimoto 0:0e0631af0305 1325
RyoheiHagimoto 0:0e0631af0305 1326 /** @brief Vector extract
RyoheiHagimoto 0:0e0631af0305 1327
RyoheiHagimoto 0:0e0631af0305 1328 Scheme:
RyoheiHagimoto 0:0e0631af0305 1329 @code
RyoheiHagimoto 0:0e0631af0305 1330 {A1 A2 A3 A4}
RyoheiHagimoto 0:0e0631af0305 1331 {B1 B2 B3 B4}
RyoheiHagimoto 0:0e0631af0305 1332 ========================
RyoheiHagimoto 0:0e0631af0305 1333 shift = 1 {A2 A3 A4 B1}
RyoheiHagimoto 0:0e0631af0305 1334 shift = 2 {A3 A4 B1 B2}
RyoheiHagimoto 0:0e0631af0305 1335 shift = 3 {A4 B1 B2 B3}
RyoheiHagimoto 0:0e0631af0305 1336 @endcode
RyoheiHagimoto 0:0e0631af0305 1337 Restriction: 0 <= shift < nlanes
RyoheiHagimoto 0:0e0631af0305 1338
RyoheiHagimoto 0:0e0631af0305 1339 Usage:
RyoheiHagimoto 0:0e0631af0305 1340 @code
RyoheiHagimoto 0:0e0631af0305 1341 v_int32x4 a, b, c;
RyoheiHagimoto 0:0e0631af0305 1342 c = v_extract<2>(a, b);
RyoheiHagimoto 0:0e0631af0305 1343 @endcode
RyoheiHagimoto 0:0e0631af0305 1344 For integer types only. */
RyoheiHagimoto 0:0e0631af0305 1345 template<int s, typename _Tp, int n>
RyoheiHagimoto 0:0e0631af0305 1346 inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b)
RyoheiHagimoto 0:0e0631af0305 1347 {
RyoheiHagimoto 0:0e0631af0305 1348 v_reg<_Tp, n> r;
RyoheiHagimoto 0:0e0631af0305 1349 const int shift = n - s;
RyoheiHagimoto 0:0e0631af0305 1350 int i = 0;
RyoheiHagimoto 0:0e0631af0305 1351 for (; i < shift; ++i)
RyoheiHagimoto 0:0e0631af0305 1352 r.s[i] = a.s[i+s];
RyoheiHagimoto 0:0e0631af0305 1353 for (; i < n; ++i)
RyoheiHagimoto 0:0e0631af0305 1354 r.s[i] = b.s[i-shift];
RyoheiHagimoto 0:0e0631af0305 1355 return r;
RyoheiHagimoto 0:0e0631af0305 1356 }
RyoheiHagimoto 0:0e0631af0305 1357
RyoheiHagimoto 0:0e0631af0305 1358 /** @brief Round
RyoheiHagimoto 0:0e0631af0305 1359
RyoheiHagimoto 0:0e0631af0305 1360 Rounds each value. Input type is float vector ==> output type is int vector.*/
RyoheiHagimoto 0:0e0631af0305 1361 template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a)
RyoheiHagimoto 0:0e0631af0305 1362 {
RyoheiHagimoto 0:0e0631af0305 1363 v_reg<int, n> c;
RyoheiHagimoto 0:0e0631af0305 1364 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1365 c.s[i] = cvRound(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1366 return c;
RyoheiHagimoto 0:0e0631af0305 1367 }
RyoheiHagimoto 0:0e0631af0305 1368
RyoheiHagimoto 0:0e0631af0305 1369 /** @brief Floor
RyoheiHagimoto 0:0e0631af0305 1370
RyoheiHagimoto 0:0e0631af0305 1371 Floor each value. Input type is float vector ==> output type is int vector.*/
RyoheiHagimoto 0:0e0631af0305 1372 template<int n> inline v_reg<int, n> v_floor(const v_reg<float, n>& a)
RyoheiHagimoto 0:0e0631af0305 1373 {
RyoheiHagimoto 0:0e0631af0305 1374 v_reg<int, n> c;
RyoheiHagimoto 0:0e0631af0305 1375 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1376 c.s[i] = cvFloor(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1377 return c;
RyoheiHagimoto 0:0e0631af0305 1378 }
RyoheiHagimoto 0:0e0631af0305 1379
RyoheiHagimoto 0:0e0631af0305 1380 /** @brief Ceil
RyoheiHagimoto 0:0e0631af0305 1381
RyoheiHagimoto 0:0e0631af0305 1382 Ceil each value. Input type is float vector ==> output type is int vector.*/
RyoheiHagimoto 0:0e0631af0305 1383 template<int n> inline v_reg<int, n> v_ceil(const v_reg<float, n>& a)
RyoheiHagimoto 0:0e0631af0305 1384 {
RyoheiHagimoto 0:0e0631af0305 1385 v_reg<int, n> c;
RyoheiHagimoto 0:0e0631af0305 1386 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1387 c.s[i] = cvCeil(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1388 return c;
RyoheiHagimoto 0:0e0631af0305 1389 }
RyoheiHagimoto 0:0e0631af0305 1390
RyoheiHagimoto 0:0e0631af0305 1391 /** @brief Trunc
RyoheiHagimoto 0:0e0631af0305 1392
RyoheiHagimoto 0:0e0631af0305 1393 Truncate each value. Input type is float vector ==> output type is int vector.*/
RyoheiHagimoto 0:0e0631af0305 1394 template<int n> inline v_reg<int, n> v_trunc(const v_reg<float, n>& a)
RyoheiHagimoto 0:0e0631af0305 1395 {
RyoheiHagimoto 0:0e0631af0305 1396 v_reg<int, n> c;
RyoheiHagimoto 0:0e0631af0305 1397 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1398 c.s[i] = (int)(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1399 return c;
RyoheiHagimoto 0:0e0631af0305 1400 }
RyoheiHagimoto 0:0e0631af0305 1401
RyoheiHagimoto 0:0e0631af0305 1402 /** @overload */
RyoheiHagimoto 0:0e0631af0305 1403 template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a)
RyoheiHagimoto 0:0e0631af0305 1404 {
RyoheiHagimoto 0:0e0631af0305 1405 v_reg<int, n*2> c;
RyoheiHagimoto 0:0e0631af0305 1406 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1407 {
RyoheiHagimoto 0:0e0631af0305 1408 c.s[i] = cvRound(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1409 c.s[i+n] = 0;
RyoheiHagimoto 0:0e0631af0305 1410 }
RyoheiHagimoto 0:0e0631af0305 1411 return c;
RyoheiHagimoto 0:0e0631af0305 1412 }
RyoheiHagimoto 0:0e0631af0305 1413
RyoheiHagimoto 0:0e0631af0305 1414 /** @overload */
RyoheiHagimoto 0:0e0631af0305 1415 template<int n> inline v_reg<int, n*2> v_floor(const v_reg<double, n>& a)
RyoheiHagimoto 0:0e0631af0305 1416 {
RyoheiHagimoto 0:0e0631af0305 1417 v_reg<int, n> c;
RyoheiHagimoto 0:0e0631af0305 1418 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1419 {
RyoheiHagimoto 0:0e0631af0305 1420 c.s[i] = cvFloor(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1421 c.s[i+n] = 0;
RyoheiHagimoto 0:0e0631af0305 1422 }
RyoheiHagimoto 0:0e0631af0305 1423 return c;
RyoheiHagimoto 0:0e0631af0305 1424 }
RyoheiHagimoto 0:0e0631af0305 1425
RyoheiHagimoto 0:0e0631af0305 1426 /** @overload */
RyoheiHagimoto 0:0e0631af0305 1427 template<int n> inline v_reg<int, n*2> v_ceil(const v_reg<double, n>& a)
RyoheiHagimoto 0:0e0631af0305 1428 {
RyoheiHagimoto 0:0e0631af0305 1429 v_reg<int, n> c;
RyoheiHagimoto 0:0e0631af0305 1430 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1431 {
RyoheiHagimoto 0:0e0631af0305 1432 c.s[i] = cvCeil(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1433 c.s[i+n] = 0;
RyoheiHagimoto 0:0e0631af0305 1434 }
RyoheiHagimoto 0:0e0631af0305 1435 return c;
RyoheiHagimoto 0:0e0631af0305 1436 }
RyoheiHagimoto 0:0e0631af0305 1437
RyoheiHagimoto 0:0e0631af0305 1438 /** @overload */
RyoheiHagimoto 0:0e0631af0305 1439 template<int n> inline v_reg<int, n*2> v_trunc(const v_reg<double, n>& a)
RyoheiHagimoto 0:0e0631af0305 1440 {
RyoheiHagimoto 0:0e0631af0305 1441 v_reg<int, n> c;
RyoheiHagimoto 0:0e0631af0305 1442 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1443 {
RyoheiHagimoto 0:0e0631af0305 1444 c.s[i] = cvCeil(a.s[i]);
RyoheiHagimoto 0:0e0631af0305 1445 c.s[i+n] = 0;
RyoheiHagimoto 0:0e0631af0305 1446 }
RyoheiHagimoto 0:0e0631af0305 1447 return c;
RyoheiHagimoto 0:0e0631af0305 1448 }
RyoheiHagimoto 0:0e0631af0305 1449
RyoheiHagimoto 0:0e0631af0305 1450 /** @brief Convert to float
RyoheiHagimoto 0:0e0631af0305 1451
RyoheiHagimoto 0:0e0631af0305 1452 Supported input type is cv::v_int32x4. */
RyoheiHagimoto 0:0e0631af0305 1453 template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a)
RyoheiHagimoto 0:0e0631af0305 1454 {
RyoheiHagimoto 0:0e0631af0305 1455 v_reg<float, n> c;
RyoheiHagimoto 0:0e0631af0305 1456 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1457 c.s[i] = (float)a.s[i];
RyoheiHagimoto 0:0e0631af0305 1458 return c;
RyoheiHagimoto 0:0e0631af0305 1459 }
RyoheiHagimoto 0:0e0631af0305 1460
RyoheiHagimoto 0:0e0631af0305 1461 /** @brief Convert to double
RyoheiHagimoto 0:0e0631af0305 1462
RyoheiHagimoto 0:0e0631af0305 1463 Supported input type is cv::v_int32x4. */
RyoheiHagimoto 0:0e0631af0305 1464 template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<int, n*2>& a)
RyoheiHagimoto 0:0e0631af0305 1465 {
RyoheiHagimoto 0:0e0631af0305 1466 v_reg<double, n> c;
RyoheiHagimoto 0:0e0631af0305 1467 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1468 c.s[i] = (double)a.s[i];
RyoheiHagimoto 0:0e0631af0305 1469 return c;
RyoheiHagimoto 0:0e0631af0305 1470 }
RyoheiHagimoto 0:0e0631af0305 1471
RyoheiHagimoto 0:0e0631af0305 1472 /** @brief Convert to double
RyoheiHagimoto 0:0e0631af0305 1473
RyoheiHagimoto 0:0e0631af0305 1474 Supported input type is cv::v_float32x4. */
RyoheiHagimoto 0:0e0631af0305 1475 template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<float, n*2>& a)
RyoheiHagimoto 0:0e0631af0305 1476 {
RyoheiHagimoto 0:0e0631af0305 1477 v_reg<double, n> c;
RyoheiHagimoto 0:0e0631af0305 1478 for( int i = 0; i < n; i++ )
RyoheiHagimoto 0:0e0631af0305 1479 c.s[i] = (double)a.s[i];
RyoheiHagimoto 0:0e0631af0305 1480 return c;
RyoheiHagimoto 0:0e0631af0305 1481 }
RyoheiHagimoto 0:0e0631af0305 1482
RyoheiHagimoto 0:0e0631af0305 1483 /** @brief Transpose 4x4 matrix
RyoheiHagimoto 0:0e0631af0305 1484
RyoheiHagimoto 0:0e0631af0305 1485 Scheme:
RyoheiHagimoto 0:0e0631af0305 1486 @code
RyoheiHagimoto 0:0e0631af0305 1487 a0 {A1 A2 A3 A4}
RyoheiHagimoto 0:0e0631af0305 1488 a1 {B1 B2 B3 B4}
RyoheiHagimoto 0:0e0631af0305 1489 a2 {C1 C2 C3 C4}
RyoheiHagimoto 0:0e0631af0305 1490 a3 {D1 D2 D3 D4}
RyoheiHagimoto 0:0e0631af0305 1491 ===============
RyoheiHagimoto 0:0e0631af0305 1492 b0 {A1 B1 C1 D1}
RyoheiHagimoto 0:0e0631af0305 1493 b1 {A2 B2 C2 D2}
RyoheiHagimoto 0:0e0631af0305 1494 b2 {A3 B3 C3 D3}
RyoheiHagimoto 0:0e0631af0305 1495 b3 {A4 B4 C4 D4}
RyoheiHagimoto 0:0e0631af0305 1496 @endcode
RyoheiHagimoto 0:0e0631af0305 1497 */
RyoheiHagimoto 0:0e0631af0305 1498 template<typename _Tp>
RyoheiHagimoto 0:0e0631af0305 1499 inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1,
RyoheiHagimoto 0:0e0631af0305 1500 const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3,
RyoheiHagimoto 0:0e0631af0305 1501 v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1,
RyoheiHagimoto 0:0e0631af0305 1502 v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 )
RyoheiHagimoto 0:0e0631af0305 1503 {
RyoheiHagimoto 0:0e0631af0305 1504 b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]);
RyoheiHagimoto 0:0e0631af0305 1505 b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]);
RyoheiHagimoto 0:0e0631af0305 1506 b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]);
RyoheiHagimoto 0:0e0631af0305 1507 b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]);
RyoheiHagimoto 0:0e0631af0305 1508 }
RyoheiHagimoto 0:0e0631af0305 1509
RyoheiHagimoto 0:0e0631af0305 1510 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1511 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1512 #define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \
RyoheiHagimoto 0:0e0631af0305 1513 inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); }
RyoheiHagimoto 0:0e0631af0305 1514
RyoheiHagimoto 0:0e0631af0305 1515 //! @name Init with zero
RyoheiHagimoto 0:0e0631af0305 1516 //! @{
RyoheiHagimoto 0:0e0631af0305 1517 //! @brief Create new vector with zero elements
RyoheiHagimoto 0:0e0631af0305 1518 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8)
RyoheiHagimoto 0:0e0631af0305 1519 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8)
RyoheiHagimoto 0:0e0631af0305 1520 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16)
RyoheiHagimoto 0:0e0631af0305 1521 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16)
RyoheiHagimoto 0:0e0631af0305 1522 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32)
RyoheiHagimoto 0:0e0631af0305 1523 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32)
RyoheiHagimoto 0:0e0631af0305 1524 OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32)
RyoheiHagimoto 0:0e0631af0305 1525 OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64)
RyoheiHagimoto 0:0e0631af0305 1526 OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64)
RyoheiHagimoto 0:0e0631af0305 1527 OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64)
RyoheiHagimoto 0:0e0631af0305 1528 //! @}
RyoheiHagimoto 0:0e0631af0305 1529
RyoheiHagimoto 0:0e0631af0305 1530 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1531 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1532 #define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \
RyoheiHagimoto 0:0e0631af0305 1533 inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); }
RyoheiHagimoto 0:0e0631af0305 1534
RyoheiHagimoto 0:0e0631af0305 1535 //! @name Init with value
RyoheiHagimoto 0:0e0631af0305 1536 //! @{
RyoheiHagimoto 0:0e0631af0305 1537 //! @brief Create new vector with elements set to a specific value
RyoheiHagimoto 0:0e0631af0305 1538 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8)
RyoheiHagimoto 0:0e0631af0305 1539 OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8)
RyoheiHagimoto 0:0e0631af0305 1540 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16)
RyoheiHagimoto 0:0e0631af0305 1541 OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16)
RyoheiHagimoto 0:0e0631af0305 1542 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32)
RyoheiHagimoto 0:0e0631af0305 1543 OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32)
RyoheiHagimoto 0:0e0631af0305 1544 OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32)
RyoheiHagimoto 0:0e0631af0305 1545 OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64)
RyoheiHagimoto 0:0e0631af0305 1546 OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64)
RyoheiHagimoto 0:0e0631af0305 1547 OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64)
RyoheiHagimoto 0:0e0631af0305 1548 //! @}
RyoheiHagimoto 0:0e0631af0305 1549
RyoheiHagimoto 0:0e0631af0305 1550 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1551 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1552 #define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \
RyoheiHagimoto 0:0e0631af0305 1553 template<typename _Tp0, int n0> inline _Tpvec \
RyoheiHagimoto 0:0e0631af0305 1554 v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \
RyoheiHagimoto 0:0e0631af0305 1555 { return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); }
RyoheiHagimoto 0:0e0631af0305 1556
RyoheiHagimoto 0:0e0631af0305 1557 //! @name Reinterpret
RyoheiHagimoto 0:0e0631af0305 1558 //! @{
RyoheiHagimoto 0:0e0631af0305 1559 //! @brief Convert vector to different type without modifying underlying data.
RyoheiHagimoto 0:0e0631af0305 1560 OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8)
RyoheiHagimoto 0:0e0631af0305 1561 OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8)
RyoheiHagimoto 0:0e0631af0305 1562 OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16)
RyoheiHagimoto 0:0e0631af0305 1563 OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16)
RyoheiHagimoto 0:0e0631af0305 1564 OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32)
RyoheiHagimoto 0:0e0631af0305 1565 OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32)
RyoheiHagimoto 0:0e0631af0305 1566 OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32)
RyoheiHagimoto 0:0e0631af0305 1567 OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64)
RyoheiHagimoto 0:0e0631af0305 1568 OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64)
RyoheiHagimoto 0:0e0631af0305 1569 OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64)
RyoheiHagimoto 0:0e0631af0305 1570 //! @}
RyoheiHagimoto 0:0e0631af0305 1571
RyoheiHagimoto 0:0e0631af0305 1572 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1573 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1574 #define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \
RyoheiHagimoto 0:0e0631af0305 1575 template<int n> inline _Tpvec v_shl(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 1576 { return a << n; }
RyoheiHagimoto 0:0e0631af0305 1577
RyoheiHagimoto 0:0e0631af0305 1578 //! @name Left shift
RyoheiHagimoto 0:0e0631af0305 1579 //! @{
RyoheiHagimoto 0:0e0631af0305 1580 //! @brief Shift left
RyoheiHagimoto 0:0e0631af0305 1581 OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort)
RyoheiHagimoto 0:0e0631af0305 1582 OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short)
RyoheiHagimoto 0:0e0631af0305 1583 OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned)
RyoheiHagimoto 0:0e0631af0305 1584 OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int)
RyoheiHagimoto 0:0e0631af0305 1585 OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64)
RyoheiHagimoto 0:0e0631af0305 1586 OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64)
RyoheiHagimoto 0:0e0631af0305 1587 //! @}
RyoheiHagimoto 0:0e0631af0305 1588
RyoheiHagimoto 0:0e0631af0305 1589 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1590 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1591 #define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \
RyoheiHagimoto 0:0e0631af0305 1592 template<int n> inline _Tpvec v_shr(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 1593 { return a >> n; }
RyoheiHagimoto 0:0e0631af0305 1594
RyoheiHagimoto 0:0e0631af0305 1595 //! @name Right shift
RyoheiHagimoto 0:0e0631af0305 1596 //! @{
RyoheiHagimoto 0:0e0631af0305 1597 //! @brief Shift right
RyoheiHagimoto 0:0e0631af0305 1598 OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort)
RyoheiHagimoto 0:0e0631af0305 1599 OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short)
RyoheiHagimoto 0:0e0631af0305 1600 OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned)
RyoheiHagimoto 0:0e0631af0305 1601 OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int)
RyoheiHagimoto 0:0e0631af0305 1602 OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64)
RyoheiHagimoto 0:0e0631af0305 1603 OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64)
RyoheiHagimoto 0:0e0631af0305 1604 //! @}
RyoheiHagimoto 0:0e0631af0305 1605
RyoheiHagimoto 0:0e0631af0305 1606 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1607 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1608 #define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \
RyoheiHagimoto 0:0e0631af0305 1609 template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 1610 { \
RyoheiHagimoto 0:0e0631af0305 1611 _Tpvec c; \
RyoheiHagimoto 0:0e0631af0305 1612 for( int i = 0; i < _Tpvec::nlanes; i++ ) \
RyoheiHagimoto 0:0e0631af0305 1613 c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
RyoheiHagimoto 0:0e0631af0305 1614 return c; \
RyoheiHagimoto 0:0e0631af0305 1615 }
RyoheiHagimoto 0:0e0631af0305 1616
RyoheiHagimoto 0:0e0631af0305 1617 //! @name Rounding shift
RyoheiHagimoto 0:0e0631af0305 1618 //! @{
RyoheiHagimoto 0:0e0631af0305 1619 //! @brief Rounding shift right
RyoheiHagimoto 0:0e0631af0305 1620 OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort)
RyoheiHagimoto 0:0e0631af0305 1621 OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short)
RyoheiHagimoto 0:0e0631af0305 1622 OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned)
RyoheiHagimoto 0:0e0631af0305 1623 OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int)
RyoheiHagimoto 0:0e0631af0305 1624 OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64)
RyoheiHagimoto 0:0e0631af0305 1625 OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64)
RyoheiHagimoto 0:0e0631af0305 1626 //! @}
RyoheiHagimoto 0:0e0631af0305 1627
RyoheiHagimoto 0:0e0631af0305 1628 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1629 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1630 #define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \
RyoheiHagimoto 0:0e0631af0305 1631 inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 1632 { \
RyoheiHagimoto 0:0e0631af0305 1633 _Tpnvec c; \
RyoheiHagimoto 0:0e0631af0305 1634 for( int i = 0; i < _Tpvec::nlanes; i++ ) \
RyoheiHagimoto 0:0e0631af0305 1635 { \
RyoheiHagimoto 0:0e0631af0305 1636 c.s[i] = saturate_cast<_Tpn>(a.s[i]); \
RyoheiHagimoto 0:0e0631af0305 1637 c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>(b.s[i]); \
RyoheiHagimoto 0:0e0631af0305 1638 } \
RyoheiHagimoto 0:0e0631af0305 1639 return c; \
RyoheiHagimoto 0:0e0631af0305 1640 }
RyoheiHagimoto 0:0e0631af0305 1641
RyoheiHagimoto 0:0e0631af0305 1642 //! @name Pack
RyoheiHagimoto 0:0e0631af0305 1643 //! @{
RyoheiHagimoto 0:0e0631af0305 1644 //! @brief Pack values from two vectors to one
RyoheiHagimoto 0:0e0631af0305 1645 //!
RyoheiHagimoto 0:0e0631af0305 1646 //! Return vector type have twice more elements than input vector types. Variant with _u_ suffix also
RyoheiHagimoto 0:0e0631af0305 1647 //! converts to corresponding unsigned type.
RyoheiHagimoto 0:0e0631af0305 1648 //!
RyoheiHagimoto 0:0e0631af0305 1649 //! - pack: for 16-, 32- and 64-bit integer input types
RyoheiHagimoto 0:0e0631af0305 1650 //! - pack_u: for 16- and 32-bit signed integer input types
RyoheiHagimoto 0:0e0631af0305 1651 OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack)
RyoheiHagimoto 0:0e0631af0305 1652 OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack)
RyoheiHagimoto 0:0e0631af0305 1653 OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack)
RyoheiHagimoto 0:0e0631af0305 1654 OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack)
RyoheiHagimoto 0:0e0631af0305 1655 OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack)
RyoheiHagimoto 0:0e0631af0305 1656 OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack)
RyoheiHagimoto 0:0e0631af0305 1657 OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u)
RyoheiHagimoto 0:0e0631af0305 1658 OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u)
RyoheiHagimoto 0:0e0631af0305 1659 //! @}
RyoheiHagimoto 0:0e0631af0305 1660
RyoheiHagimoto 0:0e0631af0305 1661 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1662 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1663 #define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
RyoheiHagimoto 0:0e0631af0305 1664 template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \
RyoheiHagimoto 0:0e0631af0305 1665 { \
RyoheiHagimoto 0:0e0631af0305 1666 _Tpnvec c; \
RyoheiHagimoto 0:0e0631af0305 1667 for( int i = 0; i < _Tpvec::nlanes; i++ ) \
RyoheiHagimoto 0:0e0631af0305 1668 { \
RyoheiHagimoto 0:0e0631af0305 1669 c.s[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
RyoheiHagimoto 0:0e0631af0305 1670 c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \
RyoheiHagimoto 0:0e0631af0305 1671 } \
RyoheiHagimoto 0:0e0631af0305 1672 return c; \
RyoheiHagimoto 0:0e0631af0305 1673 }
RyoheiHagimoto 0:0e0631af0305 1674
RyoheiHagimoto 0:0e0631af0305 1675 //! @name Pack with rounding shift
RyoheiHagimoto 0:0e0631af0305 1676 //! @{
RyoheiHagimoto 0:0e0631af0305 1677 //! @brief Pack values from two vectors to one with rounding shift
RyoheiHagimoto 0:0e0631af0305 1678 //!
RyoheiHagimoto 0:0e0631af0305 1679 //! Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower
RyoheiHagimoto 0:0e0631af0305 1680 //! type and returned in the result vector. Variant with _u_ suffix converts to unsigned type.
RyoheiHagimoto 0:0e0631af0305 1681 //!
RyoheiHagimoto 0:0e0631af0305 1682 //! - pack: for 16-, 32- and 64-bit integer input types
RyoheiHagimoto 0:0e0631af0305 1683 //! - pack_u: for 16- and 32-bit signed integer input types
RyoheiHagimoto 0:0e0631af0305 1684 OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack)
RyoheiHagimoto 0:0e0631af0305 1685 OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack)
RyoheiHagimoto 0:0e0631af0305 1686 OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
RyoheiHagimoto 0:0e0631af0305 1687 OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack)
RyoheiHagimoto 0:0e0631af0305 1688 OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
RyoheiHagimoto 0:0e0631af0305 1689 OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack)
RyoheiHagimoto 0:0e0631af0305 1690 OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u)
RyoheiHagimoto 0:0e0631af0305 1691 OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u)
RyoheiHagimoto 0:0e0631af0305 1692 //! @}
RyoheiHagimoto 0:0e0631af0305 1693
RyoheiHagimoto 0:0e0631af0305 1694 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1695 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1696 #define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
RyoheiHagimoto 0:0e0631af0305 1697 inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 1698 { \
RyoheiHagimoto 0:0e0631af0305 1699 for( int i = 0; i < _Tpvec::nlanes; i++ ) \
RyoheiHagimoto 0:0e0631af0305 1700 ptr[i] = saturate_cast<_Tpn>(a.s[i]); \
RyoheiHagimoto 0:0e0631af0305 1701 }
RyoheiHagimoto 0:0e0631af0305 1702
RyoheiHagimoto 0:0e0631af0305 1703 //! @name Pack and store
RyoheiHagimoto 0:0e0631af0305 1704 //! @{
RyoheiHagimoto 0:0e0631af0305 1705 //! @brief Store values from the input vector into memory with pack
RyoheiHagimoto 0:0e0631af0305 1706 //!
RyoheiHagimoto 0:0e0631af0305 1707 //! Values will be stored into memory with saturating conversion to narrower type.
RyoheiHagimoto 0:0e0631af0305 1708 //! Variant with _u_ suffix converts to corresponding unsigned type.
RyoheiHagimoto 0:0e0631af0305 1709 //!
RyoheiHagimoto 0:0e0631af0305 1710 //! - pack: for 16-, 32- and 64-bit integer input types
RyoheiHagimoto 0:0e0631af0305 1711 //! - pack_u: for 16- and 32-bit signed integer input types
RyoheiHagimoto 0:0e0631af0305 1712 OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack)
RyoheiHagimoto 0:0e0631af0305 1713 OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack)
RyoheiHagimoto 0:0e0631af0305 1714 OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
RyoheiHagimoto 0:0e0631af0305 1715 OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack)
RyoheiHagimoto 0:0e0631af0305 1716 OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
RyoheiHagimoto 0:0e0631af0305 1717 OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack)
RyoheiHagimoto 0:0e0631af0305 1718 OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u)
RyoheiHagimoto 0:0e0631af0305 1719 OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u)
RyoheiHagimoto 0:0e0631af0305 1720 //! @}
RyoheiHagimoto 0:0e0631af0305 1721
RyoheiHagimoto 0:0e0631af0305 1722 //! @brief Helper macro
RyoheiHagimoto 0:0e0631af0305 1723 //! @ingroup core_hal_intrin_impl
RyoheiHagimoto 0:0e0631af0305 1724 #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \
RyoheiHagimoto 0:0e0631af0305 1725 template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \
RyoheiHagimoto 0:0e0631af0305 1726 { \
RyoheiHagimoto 0:0e0631af0305 1727 for( int i = 0; i < _Tpvec::nlanes; i++ ) \
RyoheiHagimoto 0:0e0631af0305 1728 ptr[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \
RyoheiHagimoto 0:0e0631af0305 1729 }
RyoheiHagimoto 0:0e0631af0305 1730
RyoheiHagimoto 0:0e0631af0305 1731 //! @name Pack and store with rounding shift
RyoheiHagimoto 0:0e0631af0305 1732 //! @{
RyoheiHagimoto 0:0e0631af0305 1733 //! @brief Store values from the input vector into memory with pack
RyoheiHagimoto 0:0e0631af0305 1734 //!
RyoheiHagimoto 0:0e0631af0305 1735 //! Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into
RyoheiHagimoto 0:0e0631af0305 1736 //! memory. Variant with _u_ suffix converts to unsigned type.
RyoheiHagimoto 0:0e0631af0305 1737 //!
RyoheiHagimoto 0:0e0631af0305 1738 //! - pack: for 16-, 32- and 64-bit integer input types
RyoheiHagimoto 0:0e0631af0305 1739 //! - pack_u: for 16- and 32-bit signed integer input types
RyoheiHagimoto 0:0e0631af0305 1740 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack)
RyoheiHagimoto 0:0e0631af0305 1741 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack)
RyoheiHagimoto 0:0e0631af0305 1742 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack)
RyoheiHagimoto 0:0e0631af0305 1743 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack)
RyoheiHagimoto 0:0e0631af0305 1744 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack)
RyoheiHagimoto 0:0e0631af0305 1745 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack)
RyoheiHagimoto 0:0e0631af0305 1746 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u)
RyoheiHagimoto 0:0e0631af0305 1747 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u)
RyoheiHagimoto 0:0e0631af0305 1748 //! @}
RyoheiHagimoto 0:0e0631af0305 1749
RyoheiHagimoto 0:0e0631af0305 1750 /** @brief Matrix multiplication
RyoheiHagimoto 0:0e0631af0305 1751
RyoheiHagimoto 0:0e0631af0305 1752 Scheme:
RyoheiHagimoto 0:0e0631af0305 1753 @code
RyoheiHagimoto 0:0e0631af0305 1754 {A0 A1 A2 A3} |V0|
RyoheiHagimoto 0:0e0631af0305 1755 {B0 B1 B2 B3} |V1|
RyoheiHagimoto 0:0e0631af0305 1756 {C0 C1 C2 C3} |V2|
RyoheiHagimoto 0:0e0631af0305 1757 {D0 D1 D2 D3} x |V3|
RyoheiHagimoto 0:0e0631af0305 1758 ====================
RyoheiHagimoto 0:0e0631af0305 1759 {R0 R1 R2 R3}, where:
RyoheiHagimoto 0:0e0631af0305 1760 R0 = A0V0 + A1V1 + A2V2 + A3V3,
RyoheiHagimoto 0:0e0631af0305 1761 R1 = B0V0 + B1V1 + B2V2 + B3V3
RyoheiHagimoto 0:0e0631af0305 1762 ...
RyoheiHagimoto 0:0e0631af0305 1763 @endcode
RyoheiHagimoto 0:0e0631af0305 1764 */
RyoheiHagimoto 0:0e0631af0305 1765 inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0,
RyoheiHagimoto 0:0e0631af0305 1766 const v_float32x4& m1, const v_float32x4& m2,
RyoheiHagimoto 0:0e0631af0305 1767 const v_float32x4& m3)
RyoheiHagimoto 0:0e0631af0305 1768 {
RyoheiHagimoto 0:0e0631af0305 1769 return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0],
RyoheiHagimoto 0:0e0631af0305 1770 v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1],
RyoheiHagimoto 0:0e0631af0305 1771 v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2],
RyoheiHagimoto 0:0e0631af0305 1772 v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]);
RyoheiHagimoto 0:0e0631af0305 1773 }
RyoheiHagimoto 0:0e0631af0305 1774
RyoheiHagimoto 0:0e0631af0305 1775 //! @}
RyoheiHagimoto 0:0e0631af0305 1776
RyoheiHagimoto 0:0e0631af0305 1777 //! @name Check SIMD support
RyoheiHagimoto 0:0e0631af0305 1778 //! @{
RyoheiHagimoto 0:0e0631af0305 1779 //! @brief Check CPU capability of SIMD operation
RyoheiHagimoto 0:0e0631af0305 1780 static inline bool hasSIMD128()
RyoheiHagimoto 0:0e0631af0305 1781 {
RyoheiHagimoto 0:0e0631af0305 1782 return false;
RyoheiHagimoto 0:0e0631af0305 1783 }
RyoheiHagimoto 0:0e0631af0305 1784
RyoheiHagimoto 0:0e0631af0305 1785 //! @}
RyoheiHagimoto 0:0e0631af0305 1786
RyoheiHagimoto 0:0e0631af0305 1787
RyoheiHagimoto 0:0e0631af0305 1788 }
RyoheiHagimoto 0:0e0631af0305 1789
RyoheiHagimoto 0:0e0631af0305 1790 #endif