openCV library for Renesas RZ/A
Dependents: RZ_A2M_Mbed_samples
include/opencv2/core/hal/intrin_cpp.hpp@0:0e0631af0305, 2021-01-29 (annotated)
- Committer:
- RyoheiHagimoto
- Date:
- Fri Jan 29 04:53:38 2021 +0000
- Revision:
- 0:0e0631af0305
copied from https://github.com/d-kato/opencv-lib.
Who changed what in which revision?
| User | Revision | Line number | New contents of line |
|---|---|---|---|
| RyoheiHagimoto | 0:0e0631af0305 | 1 | /*M/////////////////////////////////////////////////////////////////////////////////////// |
| RyoheiHagimoto | 0:0e0631af0305 | 2 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 3 | // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. |
| RyoheiHagimoto | 0:0e0631af0305 | 4 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 5 | // By downloading, copying, installing or using the software you agree to this license. |
| RyoheiHagimoto | 0:0e0631af0305 | 6 | // If you do not agree to this license, do not download, install, |
| RyoheiHagimoto | 0:0e0631af0305 | 7 | // copy or use the software. |
| RyoheiHagimoto | 0:0e0631af0305 | 8 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 9 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 10 | // License Agreement |
| RyoheiHagimoto | 0:0e0631af0305 | 11 | // For Open Source Computer Vision Library |
| RyoheiHagimoto | 0:0e0631af0305 | 12 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 13 | // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 14 | // Copyright (C) 2009, Willow Garage Inc., all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 15 | // Copyright (C) 2013, OpenCV Foundation, all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 16 | // Copyright (C) 2015, Itseez Inc., all rights reserved. |
| RyoheiHagimoto | 0:0e0631af0305 | 17 | // Third party copyrights are property of their respective owners. |
| RyoheiHagimoto | 0:0e0631af0305 | 18 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 19 | // Redistribution and use in source and binary forms, with or without modification, |
| RyoheiHagimoto | 0:0e0631af0305 | 20 | // are permitted provided that the following conditions are met: |
| RyoheiHagimoto | 0:0e0631af0305 | 21 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 22 | // * Redistribution's of source code must retain the above copyright notice, |
| RyoheiHagimoto | 0:0e0631af0305 | 23 | // this list of conditions and the following disclaimer. |
| RyoheiHagimoto | 0:0e0631af0305 | 24 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 25 | // * Redistribution's in binary form must reproduce the above copyright notice, |
| RyoheiHagimoto | 0:0e0631af0305 | 26 | // this list of conditions and the following disclaimer in the documentation |
| RyoheiHagimoto | 0:0e0631af0305 | 27 | // and/or other materials provided with the distribution. |
| RyoheiHagimoto | 0:0e0631af0305 | 28 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 29 | // * The name of the copyright holders may not be used to endorse or promote products |
| RyoheiHagimoto | 0:0e0631af0305 | 30 | // derived from this software without specific prior written permission. |
| RyoheiHagimoto | 0:0e0631af0305 | 31 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 32 | // This software is provided by the copyright holders and contributors "as is" and |
| RyoheiHagimoto | 0:0e0631af0305 | 33 | // any express or implied warranties, including, but not limited to, the implied |
| RyoheiHagimoto | 0:0e0631af0305 | 34 | // warranties of merchantability and fitness for a particular purpose are disclaimed. |
| RyoheiHagimoto | 0:0e0631af0305 | 35 | // In no event shall the Intel Corporation or contributors be liable for any direct, |
| RyoheiHagimoto | 0:0e0631af0305 | 36 | // indirect, incidental, special, exemplary, or consequential damages |
| RyoheiHagimoto | 0:0e0631af0305 | 37 | // (including, but not limited to, procurement of substitute goods or services; |
| RyoheiHagimoto | 0:0e0631af0305 | 38 | // loss of use, data, or profits; or business interruption) however caused |
| RyoheiHagimoto | 0:0e0631af0305 | 39 | // and on any theory of liability, whether in contract, strict liability, |
| RyoheiHagimoto | 0:0e0631af0305 | 40 | // or tort (including negligence or otherwise) arising in any way out of |
| RyoheiHagimoto | 0:0e0631af0305 | 41 | // the use of this software, even if advised of the possibility of such damage. |
| RyoheiHagimoto | 0:0e0631af0305 | 42 | // |
| RyoheiHagimoto | 0:0e0631af0305 | 43 | //M*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 44 | |
| RyoheiHagimoto | 0:0e0631af0305 | 45 | #ifndef OPENCV_HAL_INTRIN_CPP_HPP |
| RyoheiHagimoto | 0:0e0631af0305 | 46 | #define OPENCV_HAL_INTRIN_CPP_HPP |
| RyoheiHagimoto | 0:0e0631af0305 | 47 | |
| RyoheiHagimoto | 0:0e0631af0305 | 48 | #include <limits> |
| RyoheiHagimoto | 0:0e0631af0305 | 49 | #include <cstring> |
| RyoheiHagimoto | 0:0e0631af0305 | 50 | #include <algorithm> |
| RyoheiHagimoto | 0:0e0631af0305 | 51 | #include "opencv2/core/saturate.hpp" |
| RyoheiHagimoto | 0:0e0631af0305 | 52 | |
| RyoheiHagimoto | 0:0e0631af0305 | 53 | namespace cv |
| RyoheiHagimoto | 0:0e0631af0305 | 54 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 55 | |
| RyoheiHagimoto | 0:0e0631af0305 | 56 | /** @addtogroup core_hal_intrin |
| RyoheiHagimoto | 0:0e0631af0305 | 57 | |
| RyoheiHagimoto | 0:0e0631af0305 | 58 | "Universal intrinsics" is a types and functions set intended to simplify vectorization of code on |
| RyoheiHagimoto | 0:0e0631af0305 | 59 | different platforms. Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86 |
| RyoheiHagimoto | 0:0e0631af0305 | 60 | architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers |
| RyoheiHagimoto | 0:0e0631af0305 | 61 | containing packed values of different types. In case when there is no SIMD extension available |
| RyoheiHagimoto | 0:0e0631af0305 | 62 | during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as |
| RyoheiHagimoto | 0:0e0631af0305 | 63 | expected although it could be slower. |
| RyoheiHagimoto | 0:0e0631af0305 | 64 | |
| RyoheiHagimoto | 0:0e0631af0305 | 65 | ### Types |
| RyoheiHagimoto | 0:0e0631af0305 | 66 | |
| RyoheiHagimoto | 0:0e0631af0305 | 67 | There are several types representing 128-bit register as a vector of packed values, each type is |
| RyoheiHagimoto | 0:0e0631af0305 | 68 | implemented as a structure based on a one SIMD register. |
| RyoheiHagimoto | 0:0e0631af0305 | 69 | |
| RyoheiHagimoto | 0:0e0631af0305 | 70 | - cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char |
| RyoheiHagimoto | 0:0e0631af0305 | 71 | - cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short |
| RyoheiHagimoto | 0:0e0631af0305 | 72 | - cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int |
| RyoheiHagimoto | 0:0e0631af0305 | 73 | - cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64 |
| RyoheiHagimoto | 0:0e0631af0305 | 74 | - cv::v_float32x4: four 32-bit floating point values (signed) - float |
| RyoheiHagimoto | 0:0e0631af0305 | 75 | - cv::v_float64x2: two 64-bit floating point valies (signed) - double |
| RyoheiHagimoto | 0:0e0631af0305 | 76 | |
| RyoheiHagimoto | 0:0e0631af0305 | 77 | @note |
| RyoheiHagimoto | 0:0e0631af0305 | 78 | cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to |
| RyoheiHagimoto | 0:0e0631af0305 | 79 | check the CV_SIMD128_64F preprocessor definition: |
| RyoheiHagimoto | 0:0e0631af0305 | 80 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 81 | #if CV_SIMD128_64F |
| RyoheiHagimoto | 0:0e0631af0305 | 82 | //... |
| RyoheiHagimoto | 0:0e0631af0305 | 83 | #endif |
| RyoheiHagimoto | 0:0e0631af0305 | 84 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 85 | |
| RyoheiHagimoto | 0:0e0631af0305 | 86 | ### Load and store operations |
| RyoheiHagimoto | 0:0e0631af0305 | 87 | |
| RyoheiHagimoto | 0:0e0631af0305 | 88 | These operations allow to set contents of the register explicitly or by loading it from some memory |
| RyoheiHagimoto | 0:0e0631af0305 | 89 | block and to save contents of the register to memory block. |
| RyoheiHagimoto | 0:0e0631af0305 | 90 | |
| RyoheiHagimoto | 0:0e0631af0305 | 91 | - Constructors: |
| RyoheiHagimoto | 0:0e0631af0305 | 92 | @ref v_reg::v_reg(const _Tp *ptr) "from memory", |
| RyoheiHagimoto | 0:0e0631af0305 | 93 | @ref v_reg::v_reg(_Tp s0, _Tp s1) "from two values", ... |
| RyoheiHagimoto | 0:0e0631af0305 | 94 | - Other create methods: |
| RyoheiHagimoto | 0:0e0631af0305 | 95 | @ref v_setall_s8, @ref v_setall_u8, ..., |
| RyoheiHagimoto | 0:0e0631af0305 | 96 | @ref v_setzero_u8, @ref v_setzero_s8, ... |
| RyoheiHagimoto | 0:0e0631af0305 | 97 | - Memory operations: |
| RyoheiHagimoto | 0:0e0631af0305 | 98 | @ref v_load, @ref v_load_aligned, @ref v_load_halves, |
| RyoheiHagimoto | 0:0e0631af0305 | 99 | @ref v_store, @ref v_store_aligned, |
| RyoheiHagimoto | 0:0e0631af0305 | 100 | @ref v_store_high, @ref v_store_low |
| RyoheiHagimoto | 0:0e0631af0305 | 101 | |
| RyoheiHagimoto | 0:0e0631af0305 | 102 | ### Value reordering |
| RyoheiHagimoto | 0:0e0631af0305 | 103 | |
| RyoheiHagimoto | 0:0e0631af0305 | 104 | These operations allow to reorder or recombine elements in one or multiple vectors. |
| RyoheiHagimoto | 0:0e0631af0305 | 105 | |
| RyoheiHagimoto | 0:0e0631af0305 | 106 | - Interleave, deinterleave (2, 3 and 4 channels): @ref v_load_deinterleave, @ref v_store_interleave |
| RyoheiHagimoto | 0:0e0631af0305 | 107 | - Expand: @ref v_load_expand, @ref v_load_expand_q, @ref v_expand |
| RyoheiHagimoto | 0:0e0631af0305 | 108 | - Pack: @ref v_pack, @ref v_pack_u, @ref v_rshr_pack, @ref v_rshr_pack_u, |
| RyoheiHagimoto | 0:0e0631af0305 | 109 | @ref v_pack_store, @ref v_pack_u_store, @ref v_rshr_pack_store, @ref v_rshr_pack_u_store |
| RyoheiHagimoto | 0:0e0631af0305 | 110 | - Recombine: @ref v_zip, @ref v_recombine, @ref v_combine_low, @ref v_combine_high |
| RyoheiHagimoto | 0:0e0631af0305 | 111 | - Extract: @ref v_extract |
| RyoheiHagimoto | 0:0e0631af0305 | 112 | |
| RyoheiHagimoto | 0:0e0631af0305 | 113 | |
| RyoheiHagimoto | 0:0e0631af0305 | 114 | ### Arithmetic, bitwise and comparison operations |
| RyoheiHagimoto | 0:0e0631af0305 | 115 | |
| RyoheiHagimoto | 0:0e0631af0305 | 116 | Element-wise binary and unary operations. |
| RyoheiHagimoto | 0:0e0631af0305 | 117 | |
| RyoheiHagimoto | 0:0e0631af0305 | 118 | - Arithmetics: |
| RyoheiHagimoto | 0:0e0631af0305 | 119 | @ref operator +(const v_reg &a, const v_reg &b) "+", |
| RyoheiHagimoto | 0:0e0631af0305 | 120 | @ref operator -(const v_reg &a, const v_reg &b) "-", |
| RyoheiHagimoto | 0:0e0631af0305 | 121 | @ref operator *(const v_reg &a, const v_reg &b) "*", |
| RyoheiHagimoto | 0:0e0631af0305 | 122 | @ref operator /(const v_reg &a, const v_reg &b) "/", |
| RyoheiHagimoto | 0:0e0631af0305 | 123 | @ref v_mul_expand |
| RyoheiHagimoto | 0:0e0631af0305 | 124 | |
| RyoheiHagimoto | 0:0e0631af0305 | 125 | - Non-saturating arithmetics: @ref v_add_wrap, @ref v_sub_wrap |
| RyoheiHagimoto | 0:0e0631af0305 | 126 | |
| RyoheiHagimoto | 0:0e0631af0305 | 127 | - Bitwise shifts: |
| RyoheiHagimoto | 0:0e0631af0305 | 128 | @ref operator <<(const v_reg &a, int s) "<<", |
| RyoheiHagimoto | 0:0e0631af0305 | 129 | @ref operator >>(const v_reg &a, int s) ">>", |
| RyoheiHagimoto | 0:0e0631af0305 | 130 | @ref v_shl, @ref v_shr |
| RyoheiHagimoto | 0:0e0631af0305 | 131 | |
| RyoheiHagimoto | 0:0e0631af0305 | 132 | - Bitwise logic: |
| RyoheiHagimoto | 0:0e0631af0305 | 133 | @ref operator&(const v_reg &a, const v_reg &b) "&", |
| RyoheiHagimoto | 0:0e0631af0305 | 134 | @ref operator |(const v_reg &a, const v_reg &b) "|", |
| RyoheiHagimoto | 0:0e0631af0305 | 135 | @ref operator ^(const v_reg &a, const v_reg &b) "^", |
| RyoheiHagimoto | 0:0e0631af0305 | 136 | @ref operator ~(const v_reg &a) "~" |
| RyoheiHagimoto | 0:0e0631af0305 | 137 | |
| RyoheiHagimoto | 0:0e0631af0305 | 138 | - Comparison: |
| RyoheiHagimoto | 0:0e0631af0305 | 139 | @ref operator >(const v_reg &a, const v_reg &b) ">", |
| RyoheiHagimoto | 0:0e0631af0305 | 140 | @ref operator >=(const v_reg &a, const v_reg &b) ">=", |
| RyoheiHagimoto | 0:0e0631af0305 | 141 | @ref operator <(const v_reg &a, const v_reg &b) "<", |
| RyoheiHagimoto | 0:0e0631af0305 | 142 | @ref operator <=(const v_reg &a, const v_reg &b) "<=", |
| RyoheiHagimoto | 0:0e0631af0305 | 143 | @ref operator==(const v_reg &a, const v_reg &b) "==", |
| RyoheiHagimoto | 0:0e0631af0305 | 144 | @ref operator !=(const v_reg &a, const v_reg &b) "!=" |
| RyoheiHagimoto | 0:0e0631af0305 | 145 | |
| RyoheiHagimoto | 0:0e0631af0305 | 146 | - min/max: @ref v_min, @ref v_max |
| RyoheiHagimoto | 0:0e0631af0305 | 147 | |
| RyoheiHagimoto | 0:0e0631af0305 | 148 | ### Reduce and mask |
| RyoheiHagimoto | 0:0e0631af0305 | 149 | |
| RyoheiHagimoto | 0:0e0631af0305 | 150 | Most of these operations return only one value. |
| RyoheiHagimoto | 0:0e0631af0305 | 151 | |
| RyoheiHagimoto | 0:0e0631af0305 | 152 | - Reduce: @ref v_reduce_min, @ref v_reduce_max, @ref v_reduce_sum |
| RyoheiHagimoto | 0:0e0631af0305 | 153 | - Mask: @ref v_signmask, @ref v_check_all, @ref v_check_any, @ref v_select |
| RyoheiHagimoto | 0:0e0631af0305 | 154 | |
| RyoheiHagimoto | 0:0e0631af0305 | 155 | ### Other math |
| RyoheiHagimoto | 0:0e0631af0305 | 156 | |
| RyoheiHagimoto | 0:0e0631af0305 | 157 | - Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude |
| RyoheiHagimoto | 0:0e0631af0305 | 158 | - Absolute values: @ref v_abs, @ref v_absdiff |
| RyoheiHagimoto | 0:0e0631af0305 | 159 | |
| RyoheiHagimoto | 0:0e0631af0305 | 160 | ### Conversions |
| RyoheiHagimoto | 0:0e0631af0305 | 161 | |
| RyoheiHagimoto | 0:0e0631af0305 | 162 | Different type conversions and casts: |
| RyoheiHagimoto | 0:0e0631af0305 | 163 | |
| RyoheiHagimoto | 0:0e0631af0305 | 164 | - Rounding: @ref v_round, @ref v_floor, @ref v_ceil, @ref v_trunc, |
| RyoheiHagimoto | 0:0e0631af0305 | 165 | - To float: @ref v_cvt_f32, @ref v_cvt_f64 |
| RyoheiHagimoto | 0:0e0631af0305 | 166 | - Reinterpret: @ref v_reinterpret_as_u8, @ref v_reinterpret_as_s8, ... |
| RyoheiHagimoto | 0:0e0631af0305 | 167 | |
| RyoheiHagimoto | 0:0e0631af0305 | 168 | ### Matrix operations |
| RyoheiHagimoto | 0:0e0631af0305 | 169 | |
| RyoheiHagimoto | 0:0e0631af0305 | 170 | In these operations vectors represent matrix rows/columns: @ref v_dotprod, @ref v_matmul, @ref v_transpose4x4 |
| RyoheiHagimoto | 0:0e0631af0305 | 171 | |
| RyoheiHagimoto | 0:0e0631af0305 | 172 | ### Usability |
| RyoheiHagimoto | 0:0e0631af0305 | 173 | |
| RyoheiHagimoto | 0:0e0631af0305 | 174 | Most operations are implemented only for some subset of the available types, following matrices |
| RyoheiHagimoto | 0:0e0631af0305 | 175 | shows the applicability of different operations to the types. |
| RyoheiHagimoto | 0:0e0631af0305 | 176 | |
| RyoheiHagimoto | 0:0e0631af0305 | 177 | Regular integers: |
| RyoheiHagimoto | 0:0e0631af0305 | 178 | |
| RyoheiHagimoto | 0:0e0631af0305 | 179 | | Operations\\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 | |
| RyoheiHagimoto | 0:0e0631af0305 | 180 | |-------------------|:-:|:-:|:-:|:-:|:-:|:-:| |
| RyoheiHagimoto | 0:0e0631af0305 | 181 | |load, store | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 182 | |interleave | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 183 | |expand | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 184 | |expand_q | x | x | | | | | |
| RyoheiHagimoto | 0:0e0631af0305 | 185 | |add, sub | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 186 | |add_wrap, sub_wrap | x | x | x | x | | | |
| RyoheiHagimoto | 0:0e0631af0305 | 187 | |mul | | | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 188 | |mul_expand | | | x | x | x | | |
| RyoheiHagimoto | 0:0e0631af0305 | 189 | |compare | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 190 | |shift | | | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 191 | |dotprod | | | | x | | | |
| RyoheiHagimoto | 0:0e0631af0305 | 192 | |logical | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 193 | |min, max | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 194 | |absdiff | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 195 | |reduce | | | | | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 196 | |mask | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 197 | |pack | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 198 | |pack_u | x | | x | | | | |
| RyoheiHagimoto | 0:0e0631af0305 | 199 | |unpack | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 200 | |extract | x | x | x | x | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 201 | |cvt_flt32 | | | | | | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 202 | |cvt_flt64 | | | | | | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 203 | |transpose4x4 | | | | | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 204 | |
| RyoheiHagimoto | 0:0e0631af0305 | 205 | Big integers: |
| RyoheiHagimoto | 0:0e0631af0305 | 206 | |
| RyoheiHagimoto | 0:0e0631af0305 | 207 | | Operations\\Types | uint 64x2 | int 64x2 | |
| RyoheiHagimoto | 0:0e0631af0305 | 208 | |-------------------|:-:|:-:| |
| RyoheiHagimoto | 0:0e0631af0305 | 209 | |load, store | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 210 | |add, sub | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 211 | |shift | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 212 | |logical | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 213 | |extract | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 214 | |
| RyoheiHagimoto | 0:0e0631af0305 | 215 | Floating point: |
| RyoheiHagimoto | 0:0e0631af0305 | 216 | |
| RyoheiHagimoto | 0:0e0631af0305 | 217 | | Operations\\Types | float 32x4 | float 64x2 | |
| RyoheiHagimoto | 0:0e0631af0305 | 218 | |-------------------|:-:|:-:| |
| RyoheiHagimoto | 0:0e0631af0305 | 219 | |load, store | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 220 | |interleave | x | | |
| RyoheiHagimoto | 0:0e0631af0305 | 221 | |add, sub | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 222 | |mul | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 223 | |div | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 224 | |compare | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 225 | |min, max | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 226 | |absdiff | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 227 | |reduce | x | | |
| RyoheiHagimoto | 0:0e0631af0305 | 228 | |mask | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 229 | |unpack | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 230 | |cvt_flt32 | | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 231 | |cvt_flt64 | x | | |
| RyoheiHagimoto | 0:0e0631af0305 | 232 | |sqrt, abs | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 233 | |float math | x | x | |
| RyoheiHagimoto | 0:0e0631af0305 | 234 | |transpose4x4 | x | | |
| RyoheiHagimoto | 0:0e0631af0305 | 235 | |
| RyoheiHagimoto | 0:0e0631af0305 | 236 | |
| RyoheiHagimoto | 0:0e0631af0305 | 237 | @{ */ |
| RyoheiHagimoto | 0:0e0631af0305 | 238 | |
| RyoheiHagimoto | 0:0e0631af0305 | 239 | template<typename _Tp, int n> struct v_reg |
| RyoheiHagimoto | 0:0e0631af0305 | 240 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 241 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 242 | typedef _Tp lane_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 243 | typedef v_reg<typename V_TypeTraits<_Tp>::int_type, n> int_vec; |
| RyoheiHagimoto | 0:0e0631af0305 | 244 | typedef v_reg<typename V_TypeTraits<_Tp>::abs_type, n> abs_vec; |
| RyoheiHagimoto | 0:0e0631af0305 | 245 | enum { nlanes = n }; |
| RyoheiHagimoto | 0:0e0631af0305 | 246 | // !@endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 247 | |
| RyoheiHagimoto | 0:0e0631af0305 | 248 | /** @brief Constructor |
| RyoheiHagimoto | 0:0e0631af0305 | 249 | |
| RyoheiHagimoto | 0:0e0631af0305 | 250 | Initializes register with data from memory |
| RyoheiHagimoto | 0:0e0631af0305 | 251 | @param ptr pointer to memory block with data for register */ |
| RyoheiHagimoto | 0:0e0631af0305 | 252 | explicit v_reg(const _Tp* ptr) { for( int i = 0; i < n; i++ ) s[i] = ptr[i]; } |
| RyoheiHagimoto | 0:0e0631af0305 | 253 | |
| RyoheiHagimoto | 0:0e0631af0305 | 254 | /** @brief Constructor |
| RyoheiHagimoto | 0:0e0631af0305 | 255 | |
| RyoheiHagimoto | 0:0e0631af0305 | 256 | Initializes register with two 64-bit values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 257 | v_reg(_Tp s0, _Tp s1) { s[0] = s0; s[1] = s1; } |
| RyoheiHagimoto | 0:0e0631af0305 | 258 | |
| RyoheiHagimoto | 0:0e0631af0305 | 259 | /** @brief Constructor |
| RyoheiHagimoto | 0:0e0631af0305 | 260 | |
| RyoheiHagimoto | 0:0e0631af0305 | 261 | Initializes register with four 32-bit values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 262 | v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3) { s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; } |
| RyoheiHagimoto | 0:0e0631af0305 | 263 | |
| RyoheiHagimoto | 0:0e0631af0305 | 264 | /** @brief Constructor |
| RyoheiHagimoto | 0:0e0631af0305 | 265 | |
| RyoheiHagimoto | 0:0e0631af0305 | 266 | Initializes register with eight 16-bit values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 267 | v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, |
| RyoheiHagimoto | 0:0e0631af0305 | 268 | _Tp s4, _Tp s5, _Tp s6, _Tp s7) |
| RyoheiHagimoto | 0:0e0631af0305 | 269 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 270 | s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; |
| RyoheiHagimoto | 0:0e0631af0305 | 271 | s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; |
| RyoheiHagimoto | 0:0e0631af0305 | 272 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 273 | |
| RyoheiHagimoto | 0:0e0631af0305 | 274 | /** @brief Constructor |
| RyoheiHagimoto | 0:0e0631af0305 | 275 | |
| RyoheiHagimoto | 0:0e0631af0305 | 276 | Initializes register with sixteen 8-bit values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 277 | v_reg(_Tp s0, _Tp s1, _Tp s2, _Tp s3, |
| RyoheiHagimoto | 0:0e0631af0305 | 278 | _Tp s4, _Tp s5, _Tp s6, _Tp s7, |
| RyoheiHagimoto | 0:0e0631af0305 | 279 | _Tp s8, _Tp s9, _Tp s10, _Tp s11, |
| RyoheiHagimoto | 0:0e0631af0305 | 280 | _Tp s12, _Tp s13, _Tp s14, _Tp s15) |
| RyoheiHagimoto | 0:0e0631af0305 | 281 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 282 | s[0] = s0; s[1] = s1; s[2] = s2; s[3] = s3; |
| RyoheiHagimoto | 0:0e0631af0305 | 283 | s[4] = s4; s[5] = s5; s[6] = s6; s[7] = s7; |
| RyoheiHagimoto | 0:0e0631af0305 | 284 | s[8] = s8; s[9] = s9; s[10] = s10; s[11] = s11; |
| RyoheiHagimoto | 0:0e0631af0305 | 285 | s[12] = s12; s[13] = s13; s[14] = s14; s[15] = s15; |
| RyoheiHagimoto | 0:0e0631af0305 | 286 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 287 | |
| RyoheiHagimoto | 0:0e0631af0305 | 288 | /** @brief Default constructor |
| RyoheiHagimoto | 0:0e0631af0305 | 289 | |
| RyoheiHagimoto | 0:0e0631af0305 | 290 | Does not initialize anything*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 291 | v_reg() {} |
| RyoheiHagimoto | 0:0e0631af0305 | 292 | |
| RyoheiHagimoto | 0:0e0631af0305 | 293 | /** @brief Copy constructor */ |
| RyoheiHagimoto | 0:0e0631af0305 | 294 | v_reg(const v_reg<_Tp, n> & r) |
| RyoheiHagimoto | 0:0e0631af0305 | 295 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 296 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 297 | s[i] = r.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 298 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 299 | /** @brief Access first value |
| RyoheiHagimoto | 0:0e0631af0305 | 300 | |
| RyoheiHagimoto | 0:0e0631af0305 | 301 | Returns value of the first lane according to register type, for example: |
| RyoheiHagimoto | 0:0e0631af0305 | 302 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 303 | v_int32x4 r(1, 2, 3, 4); |
| RyoheiHagimoto | 0:0e0631af0305 | 304 | int v = r.get0(); // returns 1 |
| RyoheiHagimoto | 0:0e0631af0305 | 305 | v_uint64x2 r(1, 2); |
| RyoheiHagimoto | 0:0e0631af0305 | 306 | uint64_t v = r.get0(); // returns 1 |
| RyoheiHagimoto | 0:0e0631af0305 | 307 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 308 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 309 | _Tp get0() const { return s[0]; } |
| RyoheiHagimoto | 0:0e0631af0305 | 310 | |
| RyoheiHagimoto | 0:0e0631af0305 | 311 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 312 | _Tp get(const int i) const { return s[i]; } |
| RyoheiHagimoto | 0:0e0631af0305 | 313 | v_reg<_Tp, n> high() const |
| RyoheiHagimoto | 0:0e0631af0305 | 314 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 315 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 316 | int i; |
| RyoheiHagimoto | 0:0e0631af0305 | 317 | for( i = 0; i < n/2; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 318 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 319 | c.s[i] = s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 320 | c.s[i+(n/2)] = 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 321 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 322 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 323 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 324 | |
| RyoheiHagimoto | 0:0e0631af0305 | 325 | static v_reg<_Tp, n> zero() |
| RyoheiHagimoto | 0:0e0631af0305 | 326 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 327 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 328 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 329 | c.s[i] = (_Tp)0; |
| RyoheiHagimoto | 0:0e0631af0305 | 330 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 331 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 332 | |
| RyoheiHagimoto | 0:0e0631af0305 | 333 | static v_reg<_Tp, n> all(_Tp s) |
| RyoheiHagimoto | 0:0e0631af0305 | 334 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 335 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 336 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 337 | c.s[i] = s; |
| RyoheiHagimoto | 0:0e0631af0305 | 338 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 339 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 340 | |
| RyoheiHagimoto | 0:0e0631af0305 | 341 | template<typename _Tp2, int n2> v_reg<_Tp2, n2> reinterpret_as() const |
| RyoheiHagimoto | 0:0e0631af0305 | 342 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 343 | size_t bytes = std::min(sizeof(_Tp2)*n2, sizeof(_Tp)*n); |
| RyoheiHagimoto | 0:0e0631af0305 | 344 | v_reg<_Tp2, n2> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 345 | std::memcpy(&c.s[0], &s[0], bytes); |
| RyoheiHagimoto | 0:0e0631af0305 | 346 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 347 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 348 | |
| RyoheiHagimoto | 0:0e0631af0305 | 349 | _Tp s[n]; |
| RyoheiHagimoto | 0:0e0631af0305 | 350 | //! @endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 351 | }; |
| RyoheiHagimoto | 0:0e0631af0305 | 352 | |
| RyoheiHagimoto | 0:0e0631af0305 | 353 | /** @brief Sixteen 8-bit unsigned integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 354 | typedef v_reg<uchar, 16> v_uint8x16; |
| RyoheiHagimoto | 0:0e0631af0305 | 355 | /** @brief Sixteen 8-bit signed integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 356 | typedef v_reg<schar, 16> v_int8x16; |
| RyoheiHagimoto | 0:0e0631af0305 | 357 | /** @brief Eight 16-bit unsigned integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 358 | typedef v_reg<ushort, 8> v_uint16x8; |
| RyoheiHagimoto | 0:0e0631af0305 | 359 | /** @brief Eight 16-bit signed integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 360 | typedef v_reg<short, 8> v_int16x8; |
| RyoheiHagimoto | 0:0e0631af0305 | 361 | /** @brief Four 32-bit unsigned integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 362 | typedef v_reg<unsigned, 4> v_uint32x4; |
| RyoheiHagimoto | 0:0e0631af0305 | 363 | /** @brief Four 32-bit signed integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 364 | typedef v_reg<int, 4> v_int32x4; |
| RyoheiHagimoto | 0:0e0631af0305 | 365 | /** @brief Four 32-bit floating point values (single precision) */ |
| RyoheiHagimoto | 0:0e0631af0305 | 366 | typedef v_reg<float, 4> v_float32x4; |
| RyoheiHagimoto | 0:0e0631af0305 | 367 | /** @brief Two 64-bit floating point values (double precision) */ |
| RyoheiHagimoto | 0:0e0631af0305 | 368 | typedef v_reg<double, 2> v_float64x2; |
| RyoheiHagimoto | 0:0e0631af0305 | 369 | /** @brief Two 64-bit unsigned integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 370 | typedef v_reg<uint64, 2> v_uint64x2; |
| RyoheiHagimoto | 0:0e0631af0305 | 371 | /** @brief Two 64-bit signed integer values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 372 | typedef v_reg<int64, 2> v_int64x2; |
| RyoheiHagimoto | 0:0e0631af0305 | 373 | |
| RyoheiHagimoto | 0:0e0631af0305 | 374 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 375 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 376 | #define OPENCV_HAL_IMPL_BIN_OP(bin_op) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 377 | template<typename _Tp, int n> inline v_reg<_Tp, n> \ |
| RyoheiHagimoto | 0:0e0631af0305 | 378 | operator bin_op (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 379 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 380 | v_reg<_Tp, n> c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 381 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 382 | c.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 383 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 384 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 385 | template<typename _Tp, int n> inline v_reg<_Tp, n>& \ |
| RyoheiHagimoto | 0:0e0631af0305 | 386 | operator bin_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 387 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 388 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 389 | a.s[i] = saturate_cast<_Tp>(a.s[i] bin_op b.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 390 | return a; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 391 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 392 | |
| RyoheiHagimoto | 0:0e0631af0305 | 393 | /** @brief Add values |
| RyoheiHagimoto | 0:0e0631af0305 | 394 | |
| RyoheiHagimoto | 0:0e0631af0305 | 395 | For all types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 396 | OPENCV_HAL_IMPL_BIN_OP(+) |
| RyoheiHagimoto | 0:0e0631af0305 | 397 | |
| RyoheiHagimoto | 0:0e0631af0305 | 398 | /** @brief Subtract values |
| RyoheiHagimoto | 0:0e0631af0305 | 399 | |
| RyoheiHagimoto | 0:0e0631af0305 | 400 | For all types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 401 | OPENCV_HAL_IMPL_BIN_OP(-) |
| RyoheiHagimoto | 0:0e0631af0305 | 402 | |
| RyoheiHagimoto | 0:0e0631af0305 | 403 | /** @brief Multiply values |
| RyoheiHagimoto | 0:0e0631af0305 | 404 | |
| RyoheiHagimoto | 0:0e0631af0305 | 405 | For 16- and 32-bit integer types and floating types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 406 | OPENCV_HAL_IMPL_BIN_OP(*) |
| RyoheiHagimoto | 0:0e0631af0305 | 407 | |
| RyoheiHagimoto | 0:0e0631af0305 | 408 | /** @brief Divide values |
| RyoheiHagimoto | 0:0e0631af0305 | 409 | |
| RyoheiHagimoto | 0:0e0631af0305 | 410 | For floating types only. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 411 | OPENCV_HAL_IMPL_BIN_OP(/) |
| RyoheiHagimoto | 0:0e0631af0305 | 412 | |
| RyoheiHagimoto | 0:0e0631af0305 | 413 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 414 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 415 | #define OPENCV_HAL_IMPL_BIT_OP(bit_op) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 416 | template<typename _Tp, int n> inline v_reg<_Tp, n> operator bit_op \ |
| RyoheiHagimoto | 0:0e0631af0305 | 417 | (const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 418 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 419 | v_reg<_Tp, n> c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 420 | typedef typename V_TypeTraits<_Tp>::int_type itype; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 421 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 422 | c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ |
| RyoheiHagimoto | 0:0e0631af0305 | 423 | V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 424 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 425 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 426 | template<typename _Tp, int n> inline v_reg<_Tp, n>& operator \ |
| RyoheiHagimoto | 0:0e0631af0305 | 427 | bit_op##= (v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 428 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 429 | typedef typename V_TypeTraits<_Tp>::int_type itype; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 430 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 431 | a.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)(V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) bit_op \ |
| RyoheiHagimoto | 0:0e0631af0305 | 432 | V_TypeTraits<_Tp>::reinterpret_int(b.s[i]))); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 433 | return a; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 434 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 435 | |
| RyoheiHagimoto | 0:0e0631af0305 | 436 | /** @brief Bitwise AND |
| RyoheiHagimoto | 0:0e0631af0305 | 437 | |
| RyoheiHagimoto | 0:0e0631af0305 | 438 | Only for integer types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 439 | OPENCV_HAL_IMPL_BIT_OP(&) |
| RyoheiHagimoto | 0:0e0631af0305 | 440 | |
| RyoheiHagimoto | 0:0e0631af0305 | 441 | /** @brief Bitwise OR |
| RyoheiHagimoto | 0:0e0631af0305 | 442 | |
| RyoheiHagimoto | 0:0e0631af0305 | 443 | Only for integer types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 444 | OPENCV_HAL_IMPL_BIT_OP(|) |
| RyoheiHagimoto | 0:0e0631af0305 | 445 | |
| RyoheiHagimoto | 0:0e0631af0305 | 446 | /** @brief Bitwise XOR |
| RyoheiHagimoto | 0:0e0631af0305 | 447 | |
| RyoheiHagimoto | 0:0e0631af0305 | 448 | Only for integer types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 449 | OPENCV_HAL_IMPL_BIT_OP(^) |
| RyoheiHagimoto | 0:0e0631af0305 | 450 | |
| RyoheiHagimoto | 0:0e0631af0305 | 451 | /** @brief Bitwise NOT |
| RyoheiHagimoto | 0:0e0631af0305 | 452 | |
| RyoheiHagimoto | 0:0e0631af0305 | 453 | Only for integer types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 454 | template<typename _Tp, int n> inline v_reg<_Tp, n> operator ~ (const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 455 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 456 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 457 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 458 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 459 | c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int(~V_TypeTraits<_Tp>::reinterpret_int(a.s[i])); |
| RyoheiHagimoto | 0:0e0631af0305 | 460 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 461 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 462 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 463 | |
| RyoheiHagimoto | 0:0e0631af0305 | 464 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 465 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 466 | #define OPENCV_HAL_IMPL_MATH_FUNC(func, cfunc, _Tp2) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 467 | template<typename _Tp, int n> inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 468 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 469 | v_reg<_Tp2, n> c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 470 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 471 | c.s[i] = cfunc(a.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 472 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 473 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 474 | |
| RyoheiHagimoto | 0:0e0631af0305 | 475 | /** @brief Square root of elements |
| RyoheiHagimoto | 0:0e0631af0305 | 476 | |
| RyoheiHagimoto | 0:0e0631af0305 | 477 | Only for floating point types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 478 | OPENCV_HAL_IMPL_MATH_FUNC(v_sqrt, std::sqrt, _Tp) |
| RyoheiHagimoto | 0:0e0631af0305 | 479 | |
| RyoheiHagimoto | 0:0e0631af0305 | 480 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 481 | OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp) |
| RyoheiHagimoto | 0:0e0631af0305 | 482 | OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp) |
| RyoheiHagimoto | 0:0e0631af0305 | 483 | OPENCV_HAL_IMPL_MATH_FUNC(v_exp, std::exp, _Tp) |
| RyoheiHagimoto | 0:0e0631af0305 | 484 | OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp) |
| RyoheiHagimoto | 0:0e0631af0305 | 485 | //! @endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 486 | |
| RyoheiHagimoto | 0:0e0631af0305 | 487 | /** @brief Absolute value of elements |
| RyoheiHagimoto | 0:0e0631af0305 | 488 | |
| RyoheiHagimoto | 0:0e0631af0305 | 489 | Only for floating point types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 490 | OPENCV_HAL_IMPL_MATH_FUNC(v_abs, (typename V_TypeTraits<_Tp>::abs_type)std::abs, |
| RyoheiHagimoto | 0:0e0631af0305 | 491 | typename V_TypeTraits<_Tp>::abs_type) |
| RyoheiHagimoto | 0:0e0631af0305 | 492 | |
| RyoheiHagimoto | 0:0e0631af0305 | 493 | /** @brief Round elements |
| RyoheiHagimoto | 0:0e0631af0305 | 494 | |
| RyoheiHagimoto | 0:0e0631af0305 | 495 | Only for floating point types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 496 | OPENCV_HAL_IMPL_MATH_FUNC(v_round, cvRound, int) |
| RyoheiHagimoto | 0:0e0631af0305 | 497 | |
| RyoheiHagimoto | 0:0e0631af0305 | 498 | /** @brief Floor elements |
| RyoheiHagimoto | 0:0e0631af0305 | 499 | |
| RyoheiHagimoto | 0:0e0631af0305 | 500 | Only for floating point types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 501 | OPENCV_HAL_IMPL_MATH_FUNC(v_floor, cvFloor, int) |
| RyoheiHagimoto | 0:0e0631af0305 | 502 | |
| RyoheiHagimoto | 0:0e0631af0305 | 503 | /** @brief Ceil elements |
| RyoheiHagimoto | 0:0e0631af0305 | 504 | |
| RyoheiHagimoto | 0:0e0631af0305 | 505 | Only for floating point types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 506 | OPENCV_HAL_IMPL_MATH_FUNC(v_ceil, cvCeil, int) |
| RyoheiHagimoto | 0:0e0631af0305 | 507 | |
| RyoheiHagimoto | 0:0e0631af0305 | 508 | /** @brief Truncate elements |
| RyoheiHagimoto | 0:0e0631af0305 | 509 | |
| RyoheiHagimoto | 0:0e0631af0305 | 510 | Only for floating point types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 511 | OPENCV_HAL_IMPL_MATH_FUNC(v_trunc, int, int) |
| RyoheiHagimoto | 0:0e0631af0305 | 512 | |
| RyoheiHagimoto | 0:0e0631af0305 | 513 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 514 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 515 | #define OPENCV_HAL_IMPL_MINMAX_FUNC(func, cfunc) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 516 | template<typename _Tp, int n> inline v_reg<_Tp, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 517 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 518 | v_reg<_Tp, n> c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 519 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 520 | c.s[i] = cfunc(a.s[i], b.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 521 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 522 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 523 | |
| RyoheiHagimoto | 0:0e0631af0305 | 524 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 525 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 526 | #define OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(func, cfunc) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 527 | template<typename _Tp, int n> inline _Tp func(const v_reg<_Tp, n>& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 528 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 529 | _Tp c = a.s[0]; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 530 | for( int i = 1; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 531 | c = cfunc(c, a.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 532 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 533 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 534 | |
| RyoheiHagimoto | 0:0e0631af0305 | 535 | /** @brief Choose min values for each pair |
| RyoheiHagimoto | 0:0e0631af0305 | 536 | |
| RyoheiHagimoto | 0:0e0631af0305 | 537 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 538 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 539 | {A1 A2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 540 | {B1 B2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 541 | -------------- |
| RyoheiHagimoto | 0:0e0631af0305 | 542 | {min(A1,B1) min(A2,B2) ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 543 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 544 | For all types except 64-bit integer. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 545 | OPENCV_HAL_IMPL_MINMAX_FUNC(v_min, std::min) |
| RyoheiHagimoto | 0:0e0631af0305 | 546 | |
| RyoheiHagimoto | 0:0e0631af0305 | 547 | /** @brief Choose max values for each pair |
| RyoheiHagimoto | 0:0e0631af0305 | 548 | |
| RyoheiHagimoto | 0:0e0631af0305 | 549 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 550 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 551 | {A1 A2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 552 | {B1 B2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 553 | -------------- |
| RyoheiHagimoto | 0:0e0631af0305 | 554 | {max(A1,B1) max(A2,B2) ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 555 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 556 | For all types except 64-bit integer. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 557 | OPENCV_HAL_IMPL_MINMAX_FUNC(v_max, std::max) |
| RyoheiHagimoto | 0:0e0631af0305 | 558 | |
| RyoheiHagimoto | 0:0e0631af0305 | 559 | /** @brief Find one min value |
| RyoheiHagimoto | 0:0e0631af0305 | 560 | |
| RyoheiHagimoto | 0:0e0631af0305 | 561 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 562 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 563 | {A1 A2 A3 ...} => min(A1,A2,A3,...) |
| RyoheiHagimoto | 0:0e0631af0305 | 564 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 565 | For 32-bit integer and 32-bit floating point types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 566 | OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_min, std::min) |
| RyoheiHagimoto | 0:0e0631af0305 | 567 | |
| RyoheiHagimoto | 0:0e0631af0305 | 568 | /** @brief Find one max value |
| RyoheiHagimoto | 0:0e0631af0305 | 569 | |
| RyoheiHagimoto | 0:0e0631af0305 | 570 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 571 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 572 | {A1 A2 A3 ...} => max(A1,A2,A3,...) |
| RyoheiHagimoto | 0:0e0631af0305 | 573 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 574 | For 32-bit integer and 32-bit floating point types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 575 | OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max, std::max) |
| RyoheiHagimoto | 0:0e0631af0305 | 576 | |
| RyoheiHagimoto | 0:0e0631af0305 | 577 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 578 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 579 | inline void v_minmax( const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, |
| RyoheiHagimoto | 0:0e0631af0305 | 580 | v_reg<_Tp, n>& minval, v_reg<_Tp, n>& maxval ) |
| RyoheiHagimoto | 0:0e0631af0305 | 581 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 582 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 583 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 584 | minval.s[i] = std::min(a.s[i], b.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 585 | maxval.s[i] = std::max(a.s[i], b.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 586 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 587 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 588 | //! @endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 589 | |
| RyoheiHagimoto | 0:0e0631af0305 | 590 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 591 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 592 | #define OPENCV_HAL_IMPL_CMP_OP(cmp_op) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 593 | template<typename _Tp, int n> \ |
| RyoheiHagimoto | 0:0e0631af0305 | 594 | inline v_reg<_Tp, n> operator cmp_op(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 595 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 596 | typedef typename V_TypeTraits<_Tp>::int_type itype; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 597 | v_reg<_Tp, n> c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 598 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 599 | c.s[i] = V_TypeTraits<_Tp>::reinterpret_from_int((itype)-(int)(a.s[i] cmp_op b.s[i])); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 600 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 601 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 602 | |
| RyoheiHagimoto | 0:0e0631af0305 | 603 | /** @brief Less-than comparison |
| RyoheiHagimoto | 0:0e0631af0305 | 604 | |
| RyoheiHagimoto | 0:0e0631af0305 | 605 | For all types except 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 606 | OPENCV_HAL_IMPL_CMP_OP(<) |
| RyoheiHagimoto | 0:0e0631af0305 | 607 | |
| RyoheiHagimoto | 0:0e0631af0305 | 608 | /** @brief Greater-than comparison |
| RyoheiHagimoto | 0:0e0631af0305 | 609 | |
| RyoheiHagimoto | 0:0e0631af0305 | 610 | For all types except 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 611 | OPENCV_HAL_IMPL_CMP_OP(>) |
| RyoheiHagimoto | 0:0e0631af0305 | 612 | |
| RyoheiHagimoto | 0:0e0631af0305 | 613 | /** @brief Less-than or equal comparison |
| RyoheiHagimoto | 0:0e0631af0305 | 614 | |
| RyoheiHagimoto | 0:0e0631af0305 | 615 | For all types except 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 616 | OPENCV_HAL_IMPL_CMP_OP(<=) |
| RyoheiHagimoto | 0:0e0631af0305 | 617 | |
| RyoheiHagimoto | 0:0e0631af0305 | 618 | /** @brief Greater-than or equal comparison |
| RyoheiHagimoto | 0:0e0631af0305 | 619 | |
| RyoheiHagimoto | 0:0e0631af0305 | 620 | For all types except 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 621 | OPENCV_HAL_IMPL_CMP_OP(>=) |
| RyoheiHagimoto | 0:0e0631af0305 | 622 | |
| RyoheiHagimoto | 0:0e0631af0305 | 623 | /** @brief Equal comparison |
| RyoheiHagimoto | 0:0e0631af0305 | 624 | |
| RyoheiHagimoto | 0:0e0631af0305 | 625 | For all types except 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 626 | OPENCV_HAL_IMPL_CMP_OP(==) |
| RyoheiHagimoto | 0:0e0631af0305 | 627 | |
| RyoheiHagimoto | 0:0e0631af0305 | 628 | /** @brief Not equal comparison |
| RyoheiHagimoto | 0:0e0631af0305 | 629 | |
| RyoheiHagimoto | 0:0e0631af0305 | 630 | For all types except 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 631 | OPENCV_HAL_IMPL_CMP_OP(!=) |
| RyoheiHagimoto | 0:0e0631af0305 | 632 | |
| RyoheiHagimoto | 0:0e0631af0305 | 633 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 634 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 635 | #define OPENCV_HAL_IMPL_ADD_SUB_OP(func, bin_op, cast_op, _Tp2) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 636 | template<typename _Tp, int n> \ |
| RyoheiHagimoto | 0:0e0631af0305 | 637 | inline v_reg<_Tp2, n> func(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 638 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 639 | typedef _Tp2 rtype; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 640 | v_reg<rtype, n> c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 641 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 642 | c.s[i] = cast_op(a.s[i] bin_op b.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 643 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 644 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 645 | |
| RyoheiHagimoto | 0:0e0631af0305 | 646 | /** @brief Add values without saturation |
| RyoheiHagimoto | 0:0e0631af0305 | 647 | |
| RyoheiHagimoto | 0:0e0631af0305 | 648 | For 8- and 16-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 649 | OPENCV_HAL_IMPL_ADD_SUB_OP(v_add_wrap, +, (_Tp), _Tp) |
| RyoheiHagimoto | 0:0e0631af0305 | 650 | |
| RyoheiHagimoto | 0:0e0631af0305 | 651 | /** @brief Subtract values without saturation |
| RyoheiHagimoto | 0:0e0631af0305 | 652 | |
| RyoheiHagimoto | 0:0e0631af0305 | 653 | For 8- and 16-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 654 | OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap, -, (_Tp), _Tp) |
| RyoheiHagimoto | 0:0e0631af0305 | 655 | |
| RyoheiHagimoto | 0:0e0631af0305 | 656 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 657 | template<typename T> inline T _absdiff(T a, T b) |
| RyoheiHagimoto | 0:0e0631af0305 | 658 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 659 | return a > b ? a - b : b - a; |
| RyoheiHagimoto | 0:0e0631af0305 | 660 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 661 | //! @endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 662 | |
| RyoheiHagimoto | 0:0e0631af0305 | 663 | /** @brief Absolute difference |
| RyoheiHagimoto | 0:0e0631af0305 | 664 | |
| RyoheiHagimoto | 0:0e0631af0305 | 665 | Returns \f$ |a - b| \f$ converted to corresponding unsigned type. |
| RyoheiHagimoto | 0:0e0631af0305 | 666 | Example: |
| RyoheiHagimoto | 0:0e0631af0305 | 667 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 668 | v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1} |
| RyoheiHagimoto | 0:0e0631af0305 | 669 | v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3} |
| RyoheiHagimoto | 0:0e0631af0305 | 670 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 671 | For 8-, 16-, 32-bit integer source types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 672 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 673 | inline v_reg<typename V_TypeTraits<_Tp>::abs_type, n> v_absdiff(const v_reg<_Tp, n>& a, const v_reg<_Tp, n> & b) |
| RyoheiHagimoto | 0:0e0631af0305 | 674 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 675 | typedef typename V_TypeTraits<_Tp>::abs_type rtype; |
| RyoheiHagimoto | 0:0e0631af0305 | 676 | v_reg<rtype, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 677 | const rtype mask = std::numeric_limits<_Tp>::is_signed ? (1 << (sizeof(rtype)*8 - 1)) : 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 678 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 679 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 680 | rtype ua = a.s[i] ^ mask; |
| RyoheiHagimoto | 0:0e0631af0305 | 681 | rtype ub = b.s[i] ^ mask; |
| RyoheiHagimoto | 0:0e0631af0305 | 682 | c.s[i] = _absdiff(ua, ub); |
| RyoheiHagimoto | 0:0e0631af0305 | 683 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 684 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 685 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 686 | |
| RyoheiHagimoto | 0:0e0631af0305 | 687 | /** @overload |
| RyoheiHagimoto | 0:0e0631af0305 | 688 | |
| RyoheiHagimoto | 0:0e0631af0305 | 689 | For 32-bit floating point values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 690 | inline v_float32x4 v_absdiff(const v_float32x4& a, const v_float32x4& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 691 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 692 | v_float32x4 c; |
| RyoheiHagimoto | 0:0e0631af0305 | 693 | for( int i = 0; i < c.nlanes; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 694 | c.s[i] = _absdiff(a.s[i], b.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 695 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 696 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 697 | |
| RyoheiHagimoto | 0:0e0631af0305 | 698 | /** @overload |
| RyoheiHagimoto | 0:0e0631af0305 | 699 | |
| RyoheiHagimoto | 0:0e0631af0305 | 700 | For 64-bit floating point values */ |
| RyoheiHagimoto | 0:0e0631af0305 | 701 | inline v_float64x2 v_absdiff(const v_float64x2& a, const v_float64x2& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 702 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 703 | v_float64x2 c; |
| RyoheiHagimoto | 0:0e0631af0305 | 704 | for( int i = 0; i < c.nlanes; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 705 | c.s[i] = _absdiff(a.s[i], b.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 706 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 707 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 708 | |
| RyoheiHagimoto | 0:0e0631af0305 | 709 | /** @brief Inversed square root |
| RyoheiHagimoto | 0:0e0631af0305 | 710 | |
| RyoheiHagimoto | 0:0e0631af0305 | 711 | Returns \f$ 1/sqrt(a) \f$ |
| RyoheiHagimoto | 0:0e0631af0305 | 712 | For floating point types only. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 713 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 714 | inline v_reg<_Tp, n> v_invsqrt(const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 715 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 716 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 717 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 718 | c.s[i] = 1.f/std::sqrt(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 719 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 720 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 721 | |
| RyoheiHagimoto | 0:0e0631af0305 | 722 | /** @brief Magnitude |
| RyoheiHagimoto | 0:0e0631af0305 | 723 | |
| RyoheiHagimoto | 0:0e0631af0305 | 724 | Returns \f$ sqrt(a^2 + b^2) \f$ |
| RyoheiHagimoto | 0:0e0631af0305 | 725 | For floating point types only. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 726 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 727 | inline v_reg<_Tp, n> v_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 728 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 729 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 730 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 731 | c.s[i] = std::sqrt(a.s[i]*a.s[i] + b.s[i]*b.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 732 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 733 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 734 | |
| RyoheiHagimoto | 0:0e0631af0305 | 735 | /** @brief Square of the magnitude |
| RyoheiHagimoto | 0:0e0631af0305 | 736 | |
| RyoheiHagimoto | 0:0e0631af0305 | 737 | Returns \f$ a^2 + b^2 \f$ |
| RyoheiHagimoto | 0:0e0631af0305 | 738 | For floating point types only. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 739 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 740 | inline v_reg<_Tp, n> v_sqr_magnitude(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 741 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 742 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 743 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 744 | c.s[i] = a.s[i]*a.s[i] + b.s[i]*b.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 745 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 746 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 747 | |
| RyoheiHagimoto | 0:0e0631af0305 | 748 | /** @brief Multiply and add |
| RyoheiHagimoto | 0:0e0631af0305 | 749 | |
| RyoheiHagimoto | 0:0e0631af0305 | 750 | Returns \f$ a*b + c \f$ |
| RyoheiHagimoto | 0:0e0631af0305 | 751 | For floating point types only. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 752 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 753 | inline v_reg<_Tp, n> v_muladd(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, |
| RyoheiHagimoto | 0:0e0631af0305 | 754 | const v_reg<_Tp, n>& c) |
| RyoheiHagimoto | 0:0e0631af0305 | 755 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 756 | v_reg<_Tp, n> d; |
| RyoheiHagimoto | 0:0e0631af0305 | 757 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 758 | d.s[i] = a.s[i]*b.s[i] + c.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 759 | return d; |
| RyoheiHagimoto | 0:0e0631af0305 | 760 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 761 | |
| RyoheiHagimoto | 0:0e0631af0305 | 762 | /** @brief Dot product of elements |
| RyoheiHagimoto | 0:0e0631af0305 | 763 | |
| RyoheiHagimoto | 0:0e0631af0305 | 764 | Multiply values in two registers and sum adjacent result pairs. |
| RyoheiHagimoto | 0:0e0631af0305 | 765 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 766 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 767 | {A1 A2 ...} // 16-bit |
| RyoheiHagimoto | 0:0e0631af0305 | 768 | x {B1 B2 ...} // 16-bit |
| RyoheiHagimoto | 0:0e0631af0305 | 769 | ------------- |
| RyoheiHagimoto | 0:0e0631af0305 | 770 | {A1B1+A2B2 ...} // 32-bit |
| RyoheiHagimoto | 0:0e0631af0305 | 771 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 772 | Implemented only for 16-bit signed source type (v_int16x8). |
| RyoheiHagimoto | 0:0e0631af0305 | 773 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 774 | template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> |
| RyoheiHagimoto | 0:0e0631af0305 | 775 | v_dotprod(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 776 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 777 | typedef typename V_TypeTraits<_Tp>::w_type w_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 778 | v_reg<w_type, n/2> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 779 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 780 | c.s[i] = (w_type)a.s[i*2]*b.s[i*2] + (w_type)a.s[i*2+1]*b.s[i*2+1]; |
| RyoheiHagimoto | 0:0e0631af0305 | 781 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 782 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 783 | |
| RyoheiHagimoto | 0:0e0631af0305 | 784 | /** @brief Multiply and expand |
| RyoheiHagimoto | 0:0e0631af0305 | 785 | |
| RyoheiHagimoto | 0:0e0631af0305 | 786 | Multiply values two registers and store results in two registers with wider pack type. |
| RyoheiHagimoto | 0:0e0631af0305 | 787 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 788 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 789 | {A B C D} // 32-bit |
| RyoheiHagimoto | 0:0e0631af0305 | 790 | x {E F G H} // 32-bit |
| RyoheiHagimoto | 0:0e0631af0305 | 791 | --------------- |
| RyoheiHagimoto | 0:0e0631af0305 | 792 | {AE BF} // 64-bit |
| RyoheiHagimoto | 0:0e0631af0305 | 793 | {CG DH} // 64-bit |
| RyoheiHagimoto | 0:0e0631af0305 | 794 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 795 | Example: |
| RyoheiHagimoto | 0:0e0631af0305 | 796 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 797 | v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2} |
| RyoheiHagimoto | 0:0e0631af0305 | 798 | v_uint64x2 c, d; // results |
| RyoheiHagimoto | 0:0e0631af0305 | 799 | v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8} |
| RyoheiHagimoto | 0:0e0631af0305 | 800 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 801 | Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4). |
| RyoheiHagimoto | 0:0e0631af0305 | 802 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 803 | template<typename _Tp, int n> inline void v_mul_expand(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, |
| RyoheiHagimoto | 0:0e0631af0305 | 804 | v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c, |
| RyoheiHagimoto | 0:0e0631af0305 | 805 | v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& d) |
| RyoheiHagimoto | 0:0e0631af0305 | 806 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 807 | typedef typename V_TypeTraits<_Tp>::w_type w_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 808 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 809 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 810 | c.s[i] = (w_type)a.s[i]*b.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 811 | d.s[i] = (w_type)a.s[i+(n/2)]*b.s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 812 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 813 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 814 | |
| RyoheiHagimoto | 0:0e0631af0305 | 815 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 816 | template<typename _Tp, int n> inline void v_hsum(const v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 817 | v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& c) |
| RyoheiHagimoto | 0:0e0631af0305 | 818 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 819 | typedef typename V_TypeTraits<_Tp>::w_type w_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 820 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 821 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 822 | c.s[i] = (w_type)a.s[i*2] + a.s[i*2+1]; |
| RyoheiHagimoto | 0:0e0631af0305 | 823 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 824 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 825 | //! @endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 826 | |
| RyoheiHagimoto | 0:0e0631af0305 | 827 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 828 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 829 | #define OPENCV_HAL_IMPL_SHIFT_OP(shift_op) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 830 | template<typename _Tp, int n> inline v_reg<_Tp, n> operator shift_op(const v_reg<_Tp, n>& a, int imm) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 831 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 832 | v_reg<_Tp, n> c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 833 | for( int i = 0; i < n; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 834 | c.s[i] = (_Tp)(a.s[i] shift_op imm); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 835 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 836 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 837 | |
| RyoheiHagimoto | 0:0e0631af0305 | 838 | /** @brief Bitwise shift left |
| RyoheiHagimoto | 0:0e0631af0305 | 839 | |
| RyoheiHagimoto | 0:0e0631af0305 | 840 | For 16-, 32- and 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 841 | OPENCV_HAL_IMPL_SHIFT_OP(<<) |
| RyoheiHagimoto | 0:0e0631af0305 | 842 | |
| RyoheiHagimoto | 0:0e0631af0305 | 843 | /** @brief Bitwise shift right |
| RyoheiHagimoto | 0:0e0631af0305 | 844 | |
| RyoheiHagimoto | 0:0e0631af0305 | 845 | For 16-, 32- and 64-bit integer values. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 846 | OPENCV_HAL_IMPL_SHIFT_OP(>>) |
| RyoheiHagimoto | 0:0e0631af0305 | 847 | |
| RyoheiHagimoto | 0:0e0631af0305 | 848 | /** @brief Sum packed values |
| RyoheiHagimoto | 0:0e0631af0305 | 849 | |
| RyoheiHagimoto | 0:0e0631af0305 | 850 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 851 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 852 | {A1 A2 A3 ...} => sum{A1,A2,A3,...} |
| RyoheiHagimoto | 0:0e0631af0305 | 853 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 854 | For 32-bit integer and 32-bit floating point types.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 855 | template<typename _Tp, int n> inline typename V_TypeTraits<_Tp>::sum_type v_reduce_sum(const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 856 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 857 | typename V_TypeTraits<_Tp>::sum_type c = a.s[0]; |
| RyoheiHagimoto | 0:0e0631af0305 | 858 | for( int i = 1; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 859 | c += a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 860 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 861 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 862 | |
| RyoheiHagimoto | 0:0e0631af0305 | 863 | /** @brief Get negative values mask |
| RyoheiHagimoto | 0:0e0631af0305 | 864 | |
| RyoheiHagimoto | 0:0e0631af0305 | 865 | Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. |
| RyoheiHagimoto | 0:0e0631af0305 | 866 | Example: |
| RyoheiHagimoto | 0:0e0631af0305 | 867 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 868 | v_int32x4 r; // set to {-1, -1, 1, 1} |
| RyoheiHagimoto | 0:0e0631af0305 | 869 | int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011 |
| RyoheiHagimoto | 0:0e0631af0305 | 870 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 871 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 872 | template<typename _Tp, int n> inline int v_signmask(const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 873 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 874 | int mask = 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 875 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 876 | mask |= (V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0) << i; |
| RyoheiHagimoto | 0:0e0631af0305 | 877 | return mask; |
| RyoheiHagimoto | 0:0e0631af0305 | 878 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 879 | |
| RyoheiHagimoto | 0:0e0631af0305 | 880 | /** @brief Check if all packed values are less than zero |
| RyoheiHagimoto | 0:0e0631af0305 | 881 | |
| RyoheiHagimoto | 0:0e0631af0305 | 882 | Unsigned values will be casted to signed: `uchar 254 => char -2`. |
| RyoheiHagimoto | 0:0e0631af0305 | 883 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 884 | template<typename _Tp, int n> inline bool v_check_all(const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 885 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 886 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 887 | if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) >= 0 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 888 | return false; |
| RyoheiHagimoto | 0:0e0631af0305 | 889 | return true; |
| RyoheiHagimoto | 0:0e0631af0305 | 890 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 891 | |
| RyoheiHagimoto | 0:0e0631af0305 | 892 | /** @brief Check if any of packed values is less than zero |
| RyoheiHagimoto | 0:0e0631af0305 | 893 | |
| RyoheiHagimoto | 0:0e0631af0305 | 894 | Unsigned values will be casted to signed: `uchar 254 => char -2`. |
| RyoheiHagimoto | 0:0e0631af0305 | 895 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 896 | template<typename _Tp, int n> inline bool v_check_any(const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 897 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 898 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 899 | if( V_TypeTraits<_Tp>::reinterpret_int(a.s[i]) < 0 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 900 | return true; |
| RyoheiHagimoto | 0:0e0631af0305 | 901 | return false; |
| RyoheiHagimoto | 0:0e0631af0305 | 902 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 903 | |
| RyoheiHagimoto | 0:0e0631af0305 | 904 | /** @brief Bitwise select |
| RyoheiHagimoto | 0:0e0631af0305 | 905 | |
| RyoheiHagimoto | 0:0e0631af0305 | 906 | Return value will be built by combining values a and b using the following scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 907 | If the i-th bit in _mask_ is 1 |
| RyoheiHagimoto | 0:0e0631af0305 | 908 | select i-th bit from _a_ |
| RyoheiHagimoto | 0:0e0631af0305 | 909 | else |
| RyoheiHagimoto | 0:0e0631af0305 | 910 | select i-th bit from _b_ */ |
| RyoheiHagimoto | 0:0e0631af0305 | 911 | template<typename _Tp, int n> inline v_reg<_Tp, n> v_select(const v_reg<_Tp, n>& mask, |
| RyoheiHagimoto | 0:0e0631af0305 | 912 | const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 913 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 914 | typedef V_TypeTraits<_Tp> Traits; |
| RyoheiHagimoto | 0:0e0631af0305 | 915 | typedef typename Traits::int_type int_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 916 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 917 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 918 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 919 | int_type m = Traits::reinterpret_int(mask.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 920 | c.s[i] = Traits::reinterpret_from_int((Traits::reinterpret_int(a.s[i]) & m) |
| RyoheiHagimoto | 0:0e0631af0305 | 921 | | (Traits::reinterpret_int(b.s[i]) & ~m)); |
| RyoheiHagimoto | 0:0e0631af0305 | 922 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 923 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 924 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 925 | |
| RyoheiHagimoto | 0:0e0631af0305 | 926 | /** @brief Expand values to the wider pack type |
| RyoheiHagimoto | 0:0e0631af0305 | 927 | |
| RyoheiHagimoto | 0:0e0631af0305 | 928 | Copy contents of register to two registers with 2x wider pack type. |
| RyoheiHagimoto | 0:0e0631af0305 | 929 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 930 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 931 | int32x4 int64x2 int64x2 |
| RyoheiHagimoto | 0:0e0631af0305 | 932 | {A B C D} ==> {A B} , {C D} |
| RyoheiHagimoto | 0:0e0631af0305 | 933 | @endcode */ |
| RyoheiHagimoto | 0:0e0631af0305 | 934 | template<typename _Tp, int n> inline void v_expand(const v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 935 | v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b0, |
| RyoheiHagimoto | 0:0e0631af0305 | 936 | v_reg<typename V_TypeTraits<_Tp>::w_type, n/2>& b1) |
| RyoheiHagimoto | 0:0e0631af0305 | 937 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 938 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 939 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 940 | b0.s[i] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 941 | b1.s[i] = a.s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 942 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 943 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 944 | |
| RyoheiHagimoto | 0:0e0631af0305 | 945 | //! @cond IGNORED |
| RyoheiHagimoto | 0:0e0631af0305 | 946 | template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::int_type, n> |
| RyoheiHagimoto | 0:0e0631af0305 | 947 | v_reinterpret_as_int(const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 948 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 949 | v_reg<typename V_TypeTraits<_Tp>::int_type, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 950 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 951 | c.s[i] = V_TypeTraits<_Tp>::reinterpret_int(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 952 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 953 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 954 | |
| RyoheiHagimoto | 0:0e0631af0305 | 955 | template<typename _Tp, int n> inline v_reg<typename V_TypeTraits<_Tp>::uint_type, n> |
| RyoheiHagimoto | 0:0e0631af0305 | 956 | v_reinterpret_as_uint(const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 957 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 958 | v_reg<typename V_TypeTraits<_Tp>::uint_type, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 959 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 960 | c.s[i] = V_TypeTraits<_Tp>::reinterpret_uint(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 961 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 962 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 963 | //! @endcond |
| RyoheiHagimoto | 0:0e0631af0305 | 964 | |
| RyoheiHagimoto | 0:0e0631af0305 | 965 | /** @brief Interleave two vectors |
| RyoheiHagimoto | 0:0e0631af0305 | 966 | |
| RyoheiHagimoto | 0:0e0631af0305 | 967 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 968 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 969 | {A1 A2 A3 A4} |
| RyoheiHagimoto | 0:0e0631af0305 | 970 | {B1 B2 B3 B4} |
| RyoheiHagimoto | 0:0e0631af0305 | 971 | --------------- |
| RyoheiHagimoto | 0:0e0631af0305 | 972 | {A1 B1 A2 B2} and {A3 B3 A4 B4} |
| RyoheiHagimoto | 0:0e0631af0305 | 973 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 974 | For all types except 64-bit. |
| RyoheiHagimoto | 0:0e0631af0305 | 975 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 976 | template<typename _Tp, int n> inline void v_zip( const v_reg<_Tp, n>& a0, const v_reg<_Tp, n>& a1, |
| RyoheiHagimoto | 0:0e0631af0305 | 977 | v_reg<_Tp, n>& b0, v_reg<_Tp, n>& b1 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 978 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 979 | int i; |
| RyoheiHagimoto | 0:0e0631af0305 | 980 | for( i = 0; i < n/2; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 981 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 982 | b0.s[i*2] = a0.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 983 | b0.s[i*2+1] = a1.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 984 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 985 | for( ; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 986 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 987 | b1.s[i*2-n] = a0.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 988 | b1.s[i*2-n+1] = a1.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 989 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 990 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 991 | |
| RyoheiHagimoto | 0:0e0631af0305 | 992 | /** @brief Load register contents from memory |
| RyoheiHagimoto | 0:0e0631af0305 | 993 | |
| RyoheiHagimoto | 0:0e0631af0305 | 994 | @param ptr pointer to memory block with data |
| RyoheiHagimoto | 0:0e0631af0305 | 995 | @return register object |
| RyoheiHagimoto | 0:0e0631af0305 | 996 | |
| RyoheiHagimoto | 0:0e0631af0305 | 997 | @note Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc. |
| RyoheiHagimoto | 0:0e0631af0305 | 998 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 999 | template<typename _Tp> |
| RyoheiHagimoto | 0:0e0631af0305 | 1000 | inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load(const _Tp* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 1001 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1002 | return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr); |
| RyoheiHagimoto | 0:0e0631af0305 | 1003 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1004 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1005 | /** @brief Load register contents from memory (aligned) |
| RyoheiHagimoto | 0:0e0631af0305 | 1006 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1007 | similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary) |
| RyoheiHagimoto | 0:0e0631af0305 | 1008 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1009 | template<typename _Tp> |
| RyoheiHagimoto | 0:0e0631af0305 | 1010 | inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_aligned(const _Tp* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 1011 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1012 | return v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes>(ptr); |
| RyoheiHagimoto | 0:0e0631af0305 | 1013 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1014 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1015 | /** @brief Load register contents from two memory blocks |
| RyoheiHagimoto | 0:0e0631af0305 | 1016 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1017 | @param loptr memory block containing data for first half (0..n/2) |
| RyoheiHagimoto | 0:0e0631af0305 | 1018 | @param hiptr memory block containing data for second half (n/2..n) |
| RyoheiHagimoto | 0:0e0631af0305 | 1019 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1020 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 1021 | int lo[2] = { 1, 2 }, hi[2] = { 3, 4 }; |
| RyoheiHagimoto | 0:0e0631af0305 | 1022 | v_int32x4 r = v_load_halves(lo, hi); |
| RyoheiHagimoto | 0:0e0631af0305 | 1023 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1024 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1025 | template<typename _Tp> |
| RyoheiHagimoto | 0:0e0631af0305 | 1026 | inline v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> v_load_halves(const _Tp* loptr, const _Tp* hiptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 1027 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1028 | v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1029 | for( int i = 0; i < c.nlanes/2; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1030 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1031 | c.s[i] = loptr[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1032 | c.s[i+c.nlanes/2] = hiptr[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1033 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1034 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1035 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1036 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1037 | /** @brief Load register contents from memory with double expand |
| RyoheiHagimoto | 0:0e0631af0305 | 1038 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1039 | Same as cv::v_load, but result pack type will be 2x wider than memory type. |
| RyoheiHagimoto | 0:0e0631af0305 | 1040 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1041 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 1042 | short buf[4] = {1, 2, 3, 4}; // type is int16 |
| RyoheiHagimoto | 0:0e0631af0305 | 1043 | v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32 |
| RyoheiHagimoto | 0:0e0631af0305 | 1044 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1045 | For 8-, 16-, 32-bit integer source types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1046 | template<typename _Tp> |
| RyoheiHagimoto | 0:0e0631af0305 | 1047 | inline v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2> |
| RyoheiHagimoto | 0:0e0631af0305 | 1048 | v_load_expand(const _Tp* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 1049 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1050 | typedef typename V_TypeTraits<_Tp>::w_type w_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 1051 | v_reg<w_type, V_SIMD128Traits<w_type>::nlanes> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1052 | for( int i = 0; i < c.nlanes; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1053 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1054 | c.s[i] = ptr[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1055 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1056 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1057 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1058 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1059 | /** @brief Load register contents from memory with quad expand |
| RyoheiHagimoto | 0:0e0631af0305 | 1060 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1061 | Same as cv::v_load_expand, but result type is 4 times wider than source. |
| RyoheiHagimoto | 0:0e0631af0305 | 1062 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 1063 | char buf[4] = {1, 2, 3, 4}; // type is int8 |
| RyoheiHagimoto | 0:0e0631af0305 | 1064 | v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32 |
| RyoheiHagimoto | 0:0e0631af0305 | 1065 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1066 | For 8-bit integer source types. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1067 | template<typename _Tp> |
| RyoheiHagimoto | 0:0e0631af0305 | 1068 | inline v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4> |
| RyoheiHagimoto | 0:0e0631af0305 | 1069 | v_load_expand_q(const _Tp* ptr) |
| RyoheiHagimoto | 0:0e0631af0305 | 1070 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1071 | typedef typename V_TypeTraits<_Tp>::q_type q_type; |
| RyoheiHagimoto | 0:0e0631af0305 | 1072 | v_reg<q_type, V_SIMD128Traits<q_type>::nlanes> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1073 | for( int i = 0; i < c.nlanes; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1074 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1075 | c.s[i] = ptr[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1076 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1077 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1078 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1079 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1080 | /** @brief Load and deinterleave (2 channels) |
| RyoheiHagimoto | 0:0e0631af0305 | 1081 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1082 | Load data from memory deinterleave and store to 2 registers. |
| RyoheiHagimoto | 0:0e0631af0305 | 1083 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1084 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1085 | {A1 B1 A2 B2 ...} ==> {A1 A2 ...}, {B1 B2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 1086 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1087 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1088 | template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 1089 | v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 1090 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1091 | int i, i2; |
| RyoheiHagimoto | 0:0e0631af0305 | 1092 | for( i = i2 = 0; i < n; i++, i2 += 2 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1093 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1094 | a.s[i] = ptr[i2]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1095 | b.s[i] = ptr[i2+1]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1096 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1097 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1098 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1099 | /** @brief Load and deinterleave (3 channels) |
| RyoheiHagimoto | 0:0e0631af0305 | 1100 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1101 | Load data from memory deinterleave and store to 3 registers. |
| RyoheiHagimoto | 0:0e0631af0305 | 1102 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1103 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1104 | {A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 1105 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1106 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1107 | template<typename _Tp, int n> inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 1108 | v_reg<_Tp, n>& b, v_reg<_Tp, n>& c) |
| RyoheiHagimoto | 0:0e0631af0305 | 1109 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1110 | int i, i3; |
| RyoheiHagimoto | 0:0e0631af0305 | 1111 | for( i = i3 = 0; i < n; i++, i3 += 3 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1112 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1113 | a.s[i] = ptr[i3]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1114 | b.s[i] = ptr[i3+1]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1115 | c.s[i] = ptr[i3+2]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1116 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1117 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1118 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1119 | /** @brief Load and deinterleave (4 channels) |
| RyoheiHagimoto | 0:0e0631af0305 | 1120 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1121 | Load data from memory deinterleave and store to 4 registers. |
| RyoheiHagimoto | 0:0e0631af0305 | 1122 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1123 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1124 | {A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 1125 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1126 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1127 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1128 | inline void v_load_deinterleave(const _Tp* ptr, v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 1129 | v_reg<_Tp, n>& b, v_reg<_Tp, n>& c, |
| RyoheiHagimoto | 0:0e0631af0305 | 1130 | v_reg<_Tp, n>& d) |
| RyoheiHagimoto | 0:0e0631af0305 | 1131 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1132 | int i, i4; |
| RyoheiHagimoto | 0:0e0631af0305 | 1133 | for( i = i4 = 0; i < n; i++, i4 += 4 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1134 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1135 | a.s[i] = ptr[i4]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1136 | b.s[i] = ptr[i4+1]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1137 | c.s[i] = ptr[i4+2]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1138 | d.s[i] = ptr[i4+3]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1139 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1140 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1141 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1142 | /** @brief Interleave and store (2 channels) |
| RyoheiHagimoto | 0:0e0631af0305 | 1143 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1144 | Interleave and store data from 2 registers to memory. |
| RyoheiHagimoto | 0:0e0631af0305 | 1145 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1146 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1147 | {A1 A2 ...}, {B1 B2 ...} ==> {A1 B1 A2 B2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 1148 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1149 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1150 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1151 | inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 1152 | const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 1153 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1154 | int i, i2; |
| RyoheiHagimoto | 0:0e0631af0305 | 1155 | for( i = i2 = 0; i < n; i++, i2 += 2 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1156 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1157 | ptr[i2] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1158 | ptr[i2+1] = b.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1159 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1160 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1161 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1162 | /** @brief Interleave and store (3 channels) |
| RyoheiHagimoto | 0:0e0631af0305 | 1163 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1164 | Interleave and store data from 3 registers to memory. |
| RyoheiHagimoto | 0:0e0631af0305 | 1165 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1166 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1167 | {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...} ==> {A1 B1 C1 A2 B2 C2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 1168 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1169 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1170 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1171 | inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 1172 | const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c) |
| RyoheiHagimoto | 0:0e0631af0305 | 1173 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1174 | int i, i3; |
| RyoheiHagimoto | 0:0e0631af0305 | 1175 | for( i = i3 = 0; i < n; i++, i3 += 3 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1176 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1177 | ptr[i3] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1178 | ptr[i3+1] = b.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1179 | ptr[i3+2] = c.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1180 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1181 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1182 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1183 | /** @brief Interleave and store (4 channels) |
| RyoheiHagimoto | 0:0e0631af0305 | 1184 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1185 | Interleave and store data from 4 registers to memory. |
| RyoheiHagimoto | 0:0e0631af0305 | 1186 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1187 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1188 | {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...} |
| RyoheiHagimoto | 0:0e0631af0305 | 1189 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1190 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1191 | template<typename _Tp, int n> inline void v_store_interleave( _Tp* ptr, const v_reg<_Tp, n>& a, |
| RyoheiHagimoto | 0:0e0631af0305 | 1192 | const v_reg<_Tp, n>& b, const v_reg<_Tp, n>& c, |
| RyoheiHagimoto | 0:0e0631af0305 | 1193 | const v_reg<_Tp, n>& d) |
| RyoheiHagimoto | 0:0e0631af0305 | 1194 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1195 | int i, i4; |
| RyoheiHagimoto | 0:0e0631af0305 | 1196 | for( i = i4 = 0; i < n; i++, i4 += 4 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1197 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1198 | ptr[i4] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1199 | ptr[i4+1] = b.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1200 | ptr[i4+2] = c.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1201 | ptr[i4+3] = d.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1202 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1203 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1204 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1205 | /** @brief Store data to memory |
| RyoheiHagimoto | 0:0e0631af0305 | 1206 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1207 | Store register contents to memory. |
| RyoheiHagimoto | 0:0e0631af0305 | 1208 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1209 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1210 | REG {A B C D} ==> MEM {A B C D} |
| RyoheiHagimoto | 0:0e0631af0305 | 1211 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1212 | Pointer can be unaligned. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1213 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1214 | inline void v_store(_Tp* ptr, const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1215 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1216 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1217 | ptr[i] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1218 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1219 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1220 | /** @brief Store data to memory (lower half) |
| RyoheiHagimoto | 0:0e0631af0305 | 1221 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1222 | Store lower half of register contents to memory. |
| RyoheiHagimoto | 0:0e0631af0305 | 1223 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1224 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1225 | REG {A B C D} ==> MEM {A B} |
| RyoheiHagimoto | 0:0e0631af0305 | 1226 | @endcode */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1227 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1228 | inline void v_store_low(_Tp* ptr, const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1229 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1230 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1231 | ptr[i] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1232 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1233 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1234 | /** @brief Store data to memory (higher half) |
| RyoheiHagimoto | 0:0e0631af0305 | 1235 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1236 | Store higher half of register contents to memory. |
| RyoheiHagimoto | 0:0e0631af0305 | 1237 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1238 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1239 | REG {A B C D} ==> MEM {C D} |
| RyoheiHagimoto | 0:0e0631af0305 | 1240 | @endcode */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1241 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1242 | inline void v_store_high(_Tp* ptr, const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1243 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1244 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1245 | ptr[i] = a.s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1246 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1247 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1248 | /** @brief Store data to memory (aligned) |
| RyoheiHagimoto | 0:0e0631af0305 | 1249 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1250 | Store register contents to memory. |
| RyoheiHagimoto | 0:0e0631af0305 | 1251 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1252 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1253 | REG {A B C D} ==> MEM {A B C D} |
| RyoheiHagimoto | 0:0e0631af0305 | 1254 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1255 | Pointer __should__ be aligned by 16-byte boundary. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1256 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1257 | inline void v_store_aligned(_Tp* ptr, const v_reg<_Tp, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1258 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1259 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1260 | ptr[i] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1261 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1262 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1263 | /** @brief Combine vector from first elements of two vectors |
| RyoheiHagimoto | 0:0e0631af0305 | 1264 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1265 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1266 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1267 | {A1 A2 A3 A4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1268 | {B1 B2 B3 B4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1269 | --------------- |
| RyoheiHagimoto | 0:0e0631af0305 | 1270 | {A1 A2 B1 B2} |
| RyoheiHagimoto | 0:0e0631af0305 | 1271 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1272 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1273 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1274 | inline v_reg<_Tp, n> v_combine_low(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 1275 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1276 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1277 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1278 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1279 | c.s[i] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1280 | c.s[i+(n/2)] = b.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1281 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1282 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1283 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1284 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1285 | /** @brief Combine vector from last elements of two vectors |
| RyoheiHagimoto | 0:0e0631af0305 | 1286 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1287 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1288 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1289 | {A1 A2 A3 A4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1290 | {B1 B2 B3 B4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1291 | --------------- |
| RyoheiHagimoto | 0:0e0631af0305 | 1292 | {A3 A4 B3 B4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1293 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1294 | For all types except 64-bit. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1295 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1296 | inline v_reg<_Tp, n> v_combine_high(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 1297 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1298 | v_reg<_Tp, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1299 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1300 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1301 | c.s[i] = a.s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1302 | c.s[i+(n/2)] = b.s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1303 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1304 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1305 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1306 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1307 | /** @brief Combine two vectors from lower and higher parts of two other vectors |
| RyoheiHagimoto | 0:0e0631af0305 | 1308 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1309 | @code{.cpp} |
| RyoheiHagimoto | 0:0e0631af0305 | 1310 | low = cv::v_combine_low(a, b); |
| RyoheiHagimoto | 0:0e0631af0305 | 1311 | high = cv::v_combine_high(a, b); |
| RyoheiHagimoto | 0:0e0631af0305 | 1312 | @endcode */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1313 | template<typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1314 | inline void v_recombine(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b, |
| RyoheiHagimoto | 0:0e0631af0305 | 1315 | v_reg<_Tp, n>& low, v_reg<_Tp, n>& high) |
| RyoheiHagimoto | 0:0e0631af0305 | 1316 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1317 | for( int i = 0; i < (n/2); i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1318 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1319 | low.s[i] = a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1320 | low.s[i+(n/2)] = b.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1321 | high.s[i] = a.s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1322 | high.s[i+(n/2)] = b.s[i+(n/2)]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1323 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1324 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1325 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1326 | /** @brief Vector extract |
| RyoheiHagimoto | 0:0e0631af0305 | 1327 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1328 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1329 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1330 | {A1 A2 A3 A4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1331 | {B1 B2 B3 B4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1332 | ======================== |
| RyoheiHagimoto | 0:0e0631af0305 | 1333 | shift = 1 {A2 A3 A4 B1} |
| RyoheiHagimoto | 0:0e0631af0305 | 1334 | shift = 2 {A3 A4 B1 B2} |
| RyoheiHagimoto | 0:0e0631af0305 | 1335 | shift = 3 {A4 B1 B2 B3} |
| RyoheiHagimoto | 0:0e0631af0305 | 1336 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1337 | Restriction: 0 <= shift < nlanes |
| RyoheiHagimoto | 0:0e0631af0305 | 1338 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1339 | Usage: |
| RyoheiHagimoto | 0:0e0631af0305 | 1340 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1341 | v_int32x4 a, b, c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1342 | c = v_extract<2>(a, b); |
| RyoheiHagimoto | 0:0e0631af0305 | 1343 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1344 | For integer types only. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1345 | template<int s, typename _Tp, int n> |
| RyoheiHagimoto | 0:0e0631af0305 | 1346 | inline v_reg<_Tp, n> v_extract(const v_reg<_Tp, n>& a, const v_reg<_Tp, n>& b) |
| RyoheiHagimoto | 0:0e0631af0305 | 1347 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1348 | v_reg<_Tp, n> r; |
| RyoheiHagimoto | 0:0e0631af0305 | 1349 | const int shift = n - s; |
| RyoheiHagimoto | 0:0e0631af0305 | 1350 | int i = 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 1351 | for (; i < shift; ++i) |
| RyoheiHagimoto | 0:0e0631af0305 | 1352 | r.s[i] = a.s[i+s]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1353 | for (; i < n; ++i) |
| RyoheiHagimoto | 0:0e0631af0305 | 1354 | r.s[i] = b.s[i-shift]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1355 | return r; |
| RyoheiHagimoto | 0:0e0631af0305 | 1356 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1357 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1358 | /** @brief Round |
| RyoheiHagimoto | 0:0e0631af0305 | 1359 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1360 | Rounds each value. Input type is float vector ==> output type is int vector.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 1361 | template<int n> inline v_reg<int, n> v_round(const v_reg<float, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1362 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1363 | v_reg<int, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1364 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1365 | c.s[i] = cvRound(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1366 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1367 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1368 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1369 | /** @brief Floor |
| RyoheiHagimoto | 0:0e0631af0305 | 1370 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1371 | Floor each value. Input type is float vector ==> output type is int vector.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 1372 | template<int n> inline v_reg<int, n> v_floor(const v_reg<float, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1373 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1374 | v_reg<int, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1375 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1376 | c.s[i] = cvFloor(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1377 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1378 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1379 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1380 | /** @brief Ceil |
| RyoheiHagimoto | 0:0e0631af0305 | 1381 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1382 | Ceil each value. Input type is float vector ==> output type is int vector.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 1383 | template<int n> inline v_reg<int, n> v_ceil(const v_reg<float, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1384 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1385 | v_reg<int, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1386 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1387 | c.s[i] = cvCeil(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1388 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1389 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1390 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1391 | /** @brief Trunc |
| RyoheiHagimoto | 0:0e0631af0305 | 1392 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1393 | Truncate each value. Input type is float vector ==> output type is int vector.*/ |
| RyoheiHagimoto | 0:0e0631af0305 | 1394 | template<int n> inline v_reg<int, n> v_trunc(const v_reg<float, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1395 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1396 | v_reg<int, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1397 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1398 | c.s[i] = (int)(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1399 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1400 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1401 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1402 | /** @overload */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1403 | template<int n> inline v_reg<int, n*2> v_round(const v_reg<double, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1404 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1405 | v_reg<int, n*2> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1406 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1407 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1408 | c.s[i] = cvRound(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1409 | c.s[i+n] = 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 1410 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1411 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1412 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1413 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1414 | /** @overload */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1415 | template<int n> inline v_reg<int, n*2> v_floor(const v_reg<double, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1416 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1417 | v_reg<int, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1418 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1419 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1420 | c.s[i] = cvFloor(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1421 | c.s[i+n] = 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 1422 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1423 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1424 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1425 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1426 | /** @overload */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1427 | template<int n> inline v_reg<int, n*2> v_ceil(const v_reg<double, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1428 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1429 | v_reg<int, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1430 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1431 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1432 | c.s[i] = cvCeil(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1433 | c.s[i+n] = 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 1434 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1435 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1436 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1437 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1438 | /** @overload */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1439 | template<int n> inline v_reg<int, n*2> v_trunc(const v_reg<double, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1440 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1441 | v_reg<int, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1442 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1443 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1444 | c.s[i] = cvCeil(a.s[i]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1445 | c.s[i+n] = 0; |
| RyoheiHagimoto | 0:0e0631af0305 | 1446 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1447 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1448 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1449 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1450 | /** @brief Convert to float |
| RyoheiHagimoto | 0:0e0631af0305 | 1451 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1452 | Supported input type is cv::v_int32x4. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1453 | template<int n> inline v_reg<float, n> v_cvt_f32(const v_reg<int, n>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1454 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1455 | v_reg<float, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1456 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1457 | c.s[i] = (float)a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1458 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1459 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1460 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1461 | /** @brief Convert to double |
| RyoheiHagimoto | 0:0e0631af0305 | 1462 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1463 | Supported input type is cv::v_int32x4. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1464 | template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<int, n*2>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1465 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1466 | v_reg<double, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1467 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1468 | c.s[i] = (double)a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1469 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1470 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1471 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1472 | /** @brief Convert to double |
| RyoheiHagimoto | 0:0e0631af0305 | 1473 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1474 | Supported input type is cv::v_float32x4. */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1475 | template<int n> inline v_reg<double, n> v_cvt_f64(const v_reg<float, n*2>& a) |
| RyoheiHagimoto | 0:0e0631af0305 | 1476 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1477 | v_reg<double, n> c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1478 | for( int i = 0; i < n; i++ ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1479 | c.s[i] = (double)a.s[i]; |
| RyoheiHagimoto | 0:0e0631af0305 | 1480 | return c; |
| RyoheiHagimoto | 0:0e0631af0305 | 1481 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1482 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1483 | /** @brief Transpose 4x4 matrix |
| RyoheiHagimoto | 0:0e0631af0305 | 1484 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1485 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1486 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1487 | a0 {A1 A2 A3 A4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1488 | a1 {B1 B2 B3 B4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1489 | a2 {C1 C2 C3 C4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1490 | a3 {D1 D2 D3 D4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1491 | =============== |
| RyoheiHagimoto | 0:0e0631af0305 | 1492 | b0 {A1 B1 C1 D1} |
| RyoheiHagimoto | 0:0e0631af0305 | 1493 | b1 {A2 B2 C2 D2} |
| RyoheiHagimoto | 0:0e0631af0305 | 1494 | b2 {A3 B3 C3 D3} |
| RyoheiHagimoto | 0:0e0631af0305 | 1495 | b3 {A4 B4 C4 D4} |
| RyoheiHagimoto | 0:0e0631af0305 | 1496 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1497 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1498 | template<typename _Tp> |
| RyoheiHagimoto | 0:0e0631af0305 | 1499 | inline void v_transpose4x4( v_reg<_Tp, 4>& a0, const v_reg<_Tp, 4>& a1, |
| RyoheiHagimoto | 0:0e0631af0305 | 1500 | const v_reg<_Tp, 4>& a2, const v_reg<_Tp, 4>& a3, |
| RyoheiHagimoto | 0:0e0631af0305 | 1501 | v_reg<_Tp, 4>& b0, v_reg<_Tp, 4>& b1, |
| RyoheiHagimoto | 0:0e0631af0305 | 1502 | v_reg<_Tp, 4>& b2, v_reg<_Tp, 4>& b3 ) |
| RyoheiHagimoto | 0:0e0631af0305 | 1503 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1504 | b0 = v_reg<_Tp, 4>(a0.s[0], a1.s[0], a2.s[0], a3.s[0]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1505 | b1 = v_reg<_Tp, 4>(a0.s[1], a1.s[1], a2.s[1], a3.s[1]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1506 | b2 = v_reg<_Tp, 4>(a0.s[2], a1.s[2], a2.s[2], a3.s[2]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1507 | b3 = v_reg<_Tp, 4>(a0.s[3], a1.s[3], a2.s[3], a3.s[3]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1508 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1509 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1510 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1511 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1512 | #define OPENCV_HAL_IMPL_C_INIT_ZERO(_Tpvec, _Tp, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1513 | inline _Tpvec v_setzero_##suffix() { return _Tpvec::zero(); } |
| RyoheiHagimoto | 0:0e0631af0305 | 1514 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1515 | //! @name Init with zero |
| RyoheiHagimoto | 0:0e0631af0305 | 1516 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1517 | //! @brief Create new vector with zero elements |
| RyoheiHagimoto | 0:0e0631af0305 | 1518 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint8x16, uchar, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1519 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1520 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint16x8, ushort, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1521 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1522 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint32x4, unsigned, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1523 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1524 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_float32x4, float, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1525 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2, double, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1526 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_uint64x2, uint64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1527 | OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2, int64, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1528 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1529 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1530 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1531 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1532 | #define OPENCV_HAL_IMPL_C_INIT_VAL(_Tpvec, _Tp, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1533 | inline _Tpvec v_setall_##suffix(_Tp val) { return _Tpvec::all(val); } |
| RyoheiHagimoto | 0:0e0631af0305 | 1534 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1535 | //! @name Init with value |
| RyoheiHagimoto | 0:0e0631af0305 | 1536 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1537 | //! @brief Create new vector with elements set to a specific value |
| RyoheiHagimoto | 0:0e0631af0305 | 1538 | OPENCV_HAL_IMPL_C_INIT_VAL(v_uint8x16, uchar, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1539 | OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1540 | OPENCV_HAL_IMPL_C_INIT_VAL(v_uint16x8, ushort, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1541 | OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1542 | OPENCV_HAL_IMPL_C_INIT_VAL(v_uint32x4, unsigned, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1543 | OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1544 | OPENCV_HAL_IMPL_C_INIT_VAL(v_float32x4, float, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1545 | OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2, double, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1546 | OPENCV_HAL_IMPL_C_INIT_VAL(v_uint64x2, uint64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1547 | OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2, int64, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1548 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1549 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1550 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1551 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1552 | #define OPENCV_HAL_IMPL_C_REINTERPRET(_Tpvec, _Tp, suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1553 | template<typename _Tp0, int n0> inline _Tpvec \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1554 | v_reinterpret_as_##suffix(const v_reg<_Tp0, n0>& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1555 | { return a.template reinterpret_as<_Tp, _Tpvec::nlanes>(); } |
| RyoheiHagimoto | 0:0e0631af0305 | 1556 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1557 | //! @name Reinterpret |
| RyoheiHagimoto | 0:0e0631af0305 | 1558 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1559 | //! @brief Convert vector to different type without modifying underlying data. |
| RyoheiHagimoto | 0:0e0631af0305 | 1560 | OPENCV_HAL_IMPL_C_REINTERPRET(v_uint8x16, uchar, u8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1561 | OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16, schar, s8) |
| RyoheiHagimoto | 0:0e0631af0305 | 1562 | OPENCV_HAL_IMPL_C_REINTERPRET(v_uint16x8, ushort, u16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1563 | OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8, short, s16) |
| RyoheiHagimoto | 0:0e0631af0305 | 1564 | OPENCV_HAL_IMPL_C_REINTERPRET(v_uint32x4, unsigned, u32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1565 | OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4, int, s32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1566 | OPENCV_HAL_IMPL_C_REINTERPRET(v_float32x4, float, f32) |
| RyoheiHagimoto | 0:0e0631af0305 | 1567 | OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2, double, f64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1568 | OPENCV_HAL_IMPL_C_REINTERPRET(v_uint64x2, uint64, u64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1569 | OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2, int64, s64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1570 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1571 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1572 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1573 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1574 | #define OPENCV_HAL_IMPL_C_SHIFTL(_Tpvec, _Tp) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1575 | template<int n> inline _Tpvec v_shl(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1576 | { return a << n; } |
| RyoheiHagimoto | 0:0e0631af0305 | 1577 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1578 | //! @name Left shift |
| RyoheiHagimoto | 0:0e0631af0305 | 1579 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1580 | //! @brief Shift left |
| RyoheiHagimoto | 0:0e0631af0305 | 1581 | OPENCV_HAL_IMPL_C_SHIFTL(v_uint16x8, ushort) |
| RyoheiHagimoto | 0:0e0631af0305 | 1582 | OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8, short) |
| RyoheiHagimoto | 0:0e0631af0305 | 1583 | OPENCV_HAL_IMPL_C_SHIFTL(v_uint32x4, unsigned) |
| RyoheiHagimoto | 0:0e0631af0305 | 1584 | OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4, int) |
| RyoheiHagimoto | 0:0e0631af0305 | 1585 | OPENCV_HAL_IMPL_C_SHIFTL(v_uint64x2, uint64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1586 | OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2, int64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1587 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1588 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1589 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1590 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1591 | #define OPENCV_HAL_IMPL_C_SHIFTR(_Tpvec, _Tp) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1592 | template<int n> inline _Tpvec v_shr(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1593 | { return a >> n; } |
| RyoheiHagimoto | 0:0e0631af0305 | 1594 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1595 | //! @name Right shift |
| RyoheiHagimoto | 0:0e0631af0305 | 1596 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1597 | //! @brief Shift right |
| RyoheiHagimoto | 0:0e0631af0305 | 1598 | OPENCV_HAL_IMPL_C_SHIFTR(v_uint16x8, ushort) |
| RyoheiHagimoto | 0:0e0631af0305 | 1599 | OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8, short) |
| RyoheiHagimoto | 0:0e0631af0305 | 1600 | OPENCV_HAL_IMPL_C_SHIFTR(v_uint32x4, unsigned) |
| RyoheiHagimoto | 0:0e0631af0305 | 1601 | OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4, int) |
| RyoheiHagimoto | 0:0e0631af0305 | 1602 | OPENCV_HAL_IMPL_C_SHIFTR(v_uint64x2, uint64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1603 | OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2, int64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1604 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1605 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1606 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1607 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1608 | #define OPENCV_HAL_IMPL_C_RSHIFTR(_Tpvec, _Tp) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1609 | template<int n> inline _Tpvec v_rshr(const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1610 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1611 | _Tpvec c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1612 | for( int i = 0; i < _Tpvec::nlanes; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1613 | c.s[i] = (_Tp)((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1614 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1615 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1616 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1617 | //! @name Rounding shift |
| RyoheiHagimoto | 0:0e0631af0305 | 1618 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1619 | //! @brief Rounding shift right |
| RyoheiHagimoto | 0:0e0631af0305 | 1620 | OPENCV_HAL_IMPL_C_RSHIFTR(v_uint16x8, ushort) |
| RyoheiHagimoto | 0:0e0631af0305 | 1621 | OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8, short) |
| RyoheiHagimoto | 0:0e0631af0305 | 1622 | OPENCV_HAL_IMPL_C_RSHIFTR(v_uint32x4, unsigned) |
| RyoheiHagimoto | 0:0e0631af0305 | 1623 | OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4, int) |
| RyoheiHagimoto | 0:0e0631af0305 | 1624 | OPENCV_HAL_IMPL_C_RSHIFTR(v_uint64x2, uint64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1625 | OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2, int64) |
| RyoheiHagimoto | 0:0e0631af0305 | 1626 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1627 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1628 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1629 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1630 | #define OPENCV_HAL_IMPL_C_PACK(_Tpvec, _Tpnvec, _Tpn, pack_suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1631 | inline _Tpnvec v_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1632 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1633 | _Tpnvec c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1634 | for( int i = 0; i < _Tpvec::nlanes; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1635 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1636 | c.s[i] = saturate_cast<_Tpn>(a.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1637 | c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>(b.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1638 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1639 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1640 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1641 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1642 | //! @name Pack |
| RyoheiHagimoto | 0:0e0631af0305 | 1643 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1644 | //! @brief Pack values from two vectors to one |
| RyoheiHagimoto | 0:0e0631af0305 | 1645 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1646 | //! Return vector type have twice more elements than input vector types. Variant with _u_ suffix also |
| RyoheiHagimoto | 0:0e0631af0305 | 1647 | //! converts to corresponding unsigned type. |
| RyoheiHagimoto | 0:0e0631af0305 | 1648 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1649 | //! - pack: for 16-, 32- and 64-bit integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1650 | //! - pack_u: for 16- and 32-bit signed integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1651 | OPENCV_HAL_IMPL_C_PACK(v_uint16x8, v_uint8x16, uchar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1652 | OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_int8x16, schar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1653 | OPENCV_HAL_IMPL_C_PACK(v_uint32x4, v_uint16x8, ushort, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1654 | OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_int16x8, short, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1655 | OPENCV_HAL_IMPL_C_PACK(v_uint64x2, v_uint32x4, unsigned, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1656 | OPENCV_HAL_IMPL_C_PACK(v_int64x2, v_int32x4, int, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1657 | OPENCV_HAL_IMPL_C_PACK(v_int16x8, v_uint8x16, uchar, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1658 | OPENCV_HAL_IMPL_C_PACK(v_int32x4, v_uint16x8, ushort, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1659 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1660 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1661 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1662 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1663 | #define OPENCV_HAL_IMPL_C_RSHR_PACK(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1664 | template<int n> inline _Tpnvec v_rshr_##pack_suffix(const _Tpvec& a, const _Tpvec& b) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1665 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1666 | _Tpnvec c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1667 | for( int i = 0; i < _Tpvec::nlanes; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1668 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1669 | c.s[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1670 | c.s[i+_Tpvec::nlanes] = saturate_cast<_Tpn>((b.s[i] + ((_Tp)1 << (n - 1))) >> n); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1671 | } \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1672 | return c; \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1673 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1674 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1675 | //! @name Pack with rounding shift |
| RyoheiHagimoto | 0:0e0631af0305 | 1676 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1677 | //! @brief Pack values from two vectors to one with rounding shift |
| RyoheiHagimoto | 0:0e0631af0305 | 1678 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1679 | //! Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower |
| RyoheiHagimoto | 0:0e0631af0305 | 1680 | //! type and returned in the result vector. Variant with _u_ suffix converts to unsigned type. |
| RyoheiHagimoto | 0:0e0631af0305 | 1681 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1682 | //! - pack: for 16-, 32- and 64-bit integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1683 | //! - pack_u: for 16- and 32-bit signed integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1684 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint16x8, ushort, v_uint8x16, uchar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1685 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_int8x16, schar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1686 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint32x4, unsigned, v_uint16x8, ushort, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1687 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_int16x8, short, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1688 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_uint64x2, uint64, v_uint32x4, unsigned, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1689 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2, int64, v_int32x4, int, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1690 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8, short, v_uint8x16, uchar, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1691 | OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4, int, v_uint16x8, ushort, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1692 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1693 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1694 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1695 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1696 | #define OPENCV_HAL_IMPL_C_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1697 | inline void v_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1698 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1699 | for( int i = 0; i < _Tpvec::nlanes; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1700 | ptr[i] = saturate_cast<_Tpn>(a.s[i]); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1701 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1702 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1703 | //! @name Pack and store |
| RyoheiHagimoto | 0:0e0631af0305 | 1704 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1705 | //! @brief Store values from the input vector into memory with pack |
| RyoheiHagimoto | 0:0e0631af0305 | 1706 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1707 | //! Values will be stored into memory with saturating conversion to narrower type. |
| RyoheiHagimoto | 0:0e0631af0305 | 1708 | //! Variant with _u_ suffix converts to corresponding unsigned type. |
| RyoheiHagimoto | 0:0e0631af0305 | 1709 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1710 | //! - pack: for 16-, 32- and 64-bit integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1711 | //! - pack_u: for 16- and 32-bit signed integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1712 | OPENCV_HAL_IMPL_C_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1713 | OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1714 | OPENCV_HAL_IMPL_C_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1715 | OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_int16x8, short, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1716 | OPENCV_HAL_IMPL_C_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1717 | OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1718 | OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1719 | OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1720 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1721 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1722 | //! @brief Helper macro |
| RyoheiHagimoto | 0:0e0631af0305 | 1723 | //! @ingroup core_hal_intrin_impl |
| RyoheiHagimoto | 0:0e0631af0305 | 1724 | #define OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(_Tpvec, _Tp, _Tpnvec, _Tpn, pack_suffix) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1725 | template<int n> inline void v_rshr_##pack_suffix##_store(_Tpn* ptr, const _Tpvec& a) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1726 | { \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1727 | for( int i = 0; i < _Tpvec::nlanes; i++ ) \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1728 | ptr[i] = saturate_cast<_Tpn>((a.s[i] + ((_Tp)1 << (n - 1))) >> n); \ |
| RyoheiHagimoto | 0:0e0631af0305 | 1729 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1730 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1731 | //! @name Pack and store with rounding shift |
| RyoheiHagimoto | 0:0e0631af0305 | 1732 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1733 | //! @brief Store values from the input vector into memory with pack |
| RyoheiHagimoto | 0:0e0631af0305 | 1734 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1735 | //! Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into |
| RyoheiHagimoto | 0:0e0631af0305 | 1736 | //! memory. Variant with _u_ suffix converts to unsigned type. |
| RyoheiHagimoto | 0:0e0631af0305 | 1737 | //! |
| RyoheiHagimoto | 0:0e0631af0305 | 1738 | //! - pack: for 16-, 32- and 64-bit integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1739 | //! - pack_u: for 16- and 32-bit signed integer input types |
| RyoheiHagimoto | 0:0e0631af0305 | 1740 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint16x8, ushort, v_uint8x16, uchar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1741 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_int8x16, schar, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1742 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint32x4, unsigned, v_uint16x8, ushort, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1743 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_int16x8, short, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1744 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_uint64x2, uint64, v_uint32x4, unsigned, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1745 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2, int64, v_int32x4, int, pack) |
| RyoheiHagimoto | 0:0e0631af0305 | 1746 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8, short, v_uint8x16, uchar, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1747 | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4, int, v_uint16x8, ushort, pack_u) |
| RyoheiHagimoto | 0:0e0631af0305 | 1748 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1749 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1750 | /** @brief Matrix multiplication |
| RyoheiHagimoto | 0:0e0631af0305 | 1751 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1752 | Scheme: |
| RyoheiHagimoto | 0:0e0631af0305 | 1753 | @code |
| RyoheiHagimoto | 0:0e0631af0305 | 1754 | {A0 A1 A2 A3} |V0| |
| RyoheiHagimoto | 0:0e0631af0305 | 1755 | {B0 B1 B2 B3} |V1| |
| RyoheiHagimoto | 0:0e0631af0305 | 1756 | {C0 C1 C2 C3} |V2| |
| RyoheiHagimoto | 0:0e0631af0305 | 1757 | {D0 D1 D2 D3} x |V3| |
| RyoheiHagimoto | 0:0e0631af0305 | 1758 | ==================== |
| RyoheiHagimoto | 0:0e0631af0305 | 1759 | {R0 R1 R2 R3}, where: |
| RyoheiHagimoto | 0:0e0631af0305 | 1760 | R0 = A0V0 + A1V1 + A2V2 + A3V3, |
| RyoheiHagimoto | 0:0e0631af0305 | 1761 | R1 = B0V0 + B1V1 + B2V2 + B3V3 |
| RyoheiHagimoto | 0:0e0631af0305 | 1762 | ... |
| RyoheiHagimoto | 0:0e0631af0305 | 1763 | @endcode |
| RyoheiHagimoto | 0:0e0631af0305 | 1764 | */ |
| RyoheiHagimoto | 0:0e0631af0305 | 1765 | inline v_float32x4 v_matmul(const v_float32x4& v, const v_float32x4& m0, |
| RyoheiHagimoto | 0:0e0631af0305 | 1766 | const v_float32x4& m1, const v_float32x4& m2, |
| RyoheiHagimoto | 0:0e0631af0305 | 1767 | const v_float32x4& m3) |
| RyoheiHagimoto | 0:0e0631af0305 | 1768 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1769 | return v_float32x4(v.s[0]*m0.s[0] + v.s[1]*m1.s[0] + v.s[2]*m2.s[0] + v.s[3]*m3.s[0], |
| RyoheiHagimoto | 0:0e0631af0305 | 1770 | v.s[0]*m0.s[1] + v.s[1]*m1.s[1] + v.s[2]*m2.s[1] + v.s[3]*m3.s[1], |
| RyoheiHagimoto | 0:0e0631af0305 | 1771 | v.s[0]*m0.s[2] + v.s[1]*m1.s[2] + v.s[2]*m2.s[2] + v.s[3]*m3.s[2], |
| RyoheiHagimoto | 0:0e0631af0305 | 1772 | v.s[0]*m0.s[3] + v.s[1]*m1.s[3] + v.s[2]*m2.s[3] + v.s[3]*m3.s[3]); |
| RyoheiHagimoto | 0:0e0631af0305 | 1773 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1774 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1775 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1776 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1777 | //! @name Check SIMD support |
| RyoheiHagimoto | 0:0e0631af0305 | 1778 | //! @{ |
| RyoheiHagimoto | 0:0e0631af0305 | 1779 | //! @brief Check CPU capability of SIMD operation |
| RyoheiHagimoto | 0:0e0631af0305 | 1780 | static inline bool hasSIMD128() |
| RyoheiHagimoto | 0:0e0631af0305 | 1781 | { |
| RyoheiHagimoto | 0:0e0631af0305 | 1782 | return false; |
| RyoheiHagimoto | 0:0e0631af0305 | 1783 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1784 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1785 | //! @} |
| RyoheiHagimoto | 0:0e0631af0305 | 1786 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1787 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1788 | } |
| RyoheiHagimoto | 0:0e0631af0305 | 1789 | |
| RyoheiHagimoto | 0:0e0631af0305 | 1790 | #endif |