Opencv 3.1 project on GR-PEACH board
Fork of gr-peach-opencv-project by
Universal intrinsics
[Hardware Acceleration Layer]
"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. More...
Modules | |
Private implementation helpers | |
Typedefs | |
typedef v_reg< uchar, 16 > | v_uint8x16 |
Sixteen 8-bit unsigned integer values. | |
typedef v_reg< unsigned, 4 > | v_uint32x4 |
Four 32-bit unsigned integer values. | |
typedef v_reg< float, 4 > | v_float32x4 |
Four 32-bit floating point values (single precision) | |
typedef v_reg< double, 2 > | v_float64x2 |
Two 64-bit floating point values (double precision) | |
typedef v_reg< uint64, 2 > | v_uint64x2 |
Two 64-bit unsigned integer values. | |
typedef v_reg< int64, 2 > | v_int64x2 |
Two 64-bit signed integer values. | |
Functions | |
OPENCV_HAL_IMPL_MATH_FUNC (v_sqrt, std::sqrt, _Tp) OPENCV_HAL_IMPL_MATH_FUNC(v_abs | |
Square root of elements. | |
V_TypeTraits< _Tp >::abs_type V_TypeTraits< _Tp >::abs_type | OPENCV_HAL_IMPL_MATH_FUNC (v_round, cvRound, int) OPENCV_HAL_IMPL_MATH_FUNC(v_floor |
Round elements. | |
V_TypeTraits< _Tp >::abs_type V_TypeTraits< _Tp >::abs_type int | OPENCV_HAL_IMPL_MATH_FUNC (v_ceil, cvCeil, int) OPENCV_HAL_IMPL_MATH_FUNC(v_trunc |
Ceil elements. | |
OPENCV_HAL_IMPL_MINMAX_FUNC (v_min, std::min) OPENCV_HAL_IMPL_MINMAX_FUNC(v_max | |
Choose min values for each pair. | |
std::max | OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC (v_reduce_min, std::min) OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max |
Find one min value. | |
OPENCV_HAL_IMPL_CMP_OP (<) OPENCV_HAL_IMPL_CMP_OP(>) OPENCV_HAL_IMPL_CMP_OP(< | |
Less-than comparison. | |
OPENCV_HAL_IMPL_ADD_SUB_OP (v_add_wrap,+,(_Tp), _Tp) OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap | |
Add values without saturation. | |
template<typename _Tp , int n> | |
_Tp v_reg< typename V_TypeTraits< _Tp >::abs_type, n > | v_absdiff (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Absolute difference. | |
v_float32x4 | v_absdiff (const v_float32x4 &a, const v_float32x4 &b) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 32-bit floating point values. | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | v_invsqrt (const v_reg< _Tp, n > &a) |
Inversed square root. | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | v_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Magnitude. | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | v_sqr_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Square of the magnitude. | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | v_muladd (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c) |
Multiply and add. | |
template<typename _Tp , int n> | |
v_reg< typename V_TypeTraits < _Tp >::w_type, n/2 > | v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Dot product of elements. | |
template<typename _Tp , int n> | |
void | v_mul_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d) |
Multiply and expand. | |
OPENCV_HAL_IMPL_SHIFT_OP (<<) OPENCV_HAL_IMPL_SHIFT_OP(>>) template< typename _Tp | |
Bitwise shift left. | |
template<typename _Tp , int n> | |
int | v_signmask (const v_reg< _Tp, n > &a) |
Get negative values mask. | |
template<typename _Tp , int n> | |
bool | v_check_all (const v_reg< _Tp, n > &a) |
Check if all packed values are less than zero. | |
template<typename _Tp , int n> | |
bool | v_check_any (const v_reg< _Tp, n > &a) |
Check if any of packed values is less than zero. | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | v_select (const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Bitwise select. | |
template<typename _Tp , int n> | |
void | v_expand (const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1) |
Expand values to the wider pack type. | |
template<typename _Tp , int n> | |
void | v_zip (const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1) |
Interleave two vectors. | |
template<typename _Tp > | |
v_reg< _Tp, V_SIMD128Traits < _Tp >::nlanes > | v_load (const _Tp *ptr) |
Load register contents from memory. | |
template<typename _Tp > | |
v_reg< _Tp, V_SIMD128Traits < _Tp >::nlanes > | v_load_aligned (const _Tp *ptr) |
Load register contents from memory (aligned) | |
template<typename _Tp > | |
v_reg< _Tp, V_SIMD128Traits < _Tp >::nlanes > | v_load_halves (const _Tp *loptr, const _Tp *hiptr) |
Load register contents from two memory blocks. | |
template<typename _Tp > | |
v_reg< typename V_TypeTraits < _Tp >::w_type, V_SIMD128Traits< _Tp >::nlanes/2 > | v_load_expand (const _Tp *ptr) |
Load register contents from memory with double expand. | |
template<typename _Tp > | |
v_reg< typename V_TypeTraits < _Tp >::q_type, V_SIMD128Traits< _Tp >::nlanes/4 > | v_load_expand_q (const _Tp *ptr) |
Load register contents from memory with quad expand. | |
template<typename _Tp , int n> | |
void | v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c) |
Load and deinterleave (4 channels) | |
template<typename _Tp , int n> | |
void | v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c, v_reg< _Tp, n > &d) |
Load and deinterleave (3 channels) | |
template<typename _Tp , int n> | |
void | v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c) |
Interleave and store (3 channels) | |
template<typename _Tp , int n> | |
void | v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, const v_reg< _Tp, n > &d) |
Interleave and store (4 channels) | |
template<typename _Tp , int n> | |
void | v_store (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory. | |
template<typename _Tp , int n> | |
void | v_store_low (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory (lower half) | |
template<typename _Tp , int n> | |
void | v_store_high (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory (higher half) | |
template<typename _Tp , int n> | |
void | v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a) |
Store data to memory (aligned) | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | v_combine_low (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Combine vector from first elements of two vectors. | |
template<typename _Tp , int n> | |
v_reg< _Tp, n > | v_combine_high (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Combine vector from last elements of two vectors. | |
template<typename _Tp , int n> | |
void | v_recombine (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high) |
Combine two vectors from lower and higher parts of two other vectors. | |
template<int s, typename _Tp , int n> | |
v_reg< _Tp, n > | v_extract (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b) |
Vector extract. | |
template<int n> | |
v_reg< int, n > | v_round (const v_reg< float, n > &a) |
Round. | |
template<int n> | |
v_reg< int, n > | v_floor (const v_reg< float, n > &a) |
Floor. | |
template<int n> | |
v_reg< int, n > | v_ceil (const v_reg< float, n > &a) |
Ceil. | |
template<int n> | |
v_reg< int, n > | v_trunc (const v_reg< float, n > &a) |
Trunc. | |
template<int n> | |
v_reg< int, n *2 > | v_round (const v_reg< double, n > &a) |
template<int n> | |
v_reg< int, n *2 > | v_floor (const v_reg< double, n > &a) |
template<int n> | |
v_reg< int, n *2 > | v_ceil (const v_reg< double, n > &a) |
template<int n> | |
v_reg< int, n *2 > | v_trunc (const v_reg< double, n > &a) |
template<int n> | |
v_reg< float, n > | v_cvt_f32 (const v_reg< int, n > &a) |
Convert to float. | |
template<int n> | |
v_reg< double, n > | v_cvt_f64 (const v_reg< int, n *2 > &a) |
Convert to double. | |
template<int n> | |
v_reg< double, n > | v_cvt_f64 (const v_reg< float, n *2 > &a) |
Convert to double. | |
template<typename _Tp > | |
void | v_transpose4x4 (v_reg< _Tp, 4 > &a0, const v_reg< _Tp, 4 > &a1, const v_reg< _Tp, 4 > &a2, const v_reg< _Tp, 4 > &a3, v_reg< _Tp, 4 > &b0, v_reg< _Tp, 4 > &b1, v_reg< _Tp, 4 > &b2, v_reg< _Tp, 4 > &b3) |
Transpose 4x4 matrix. | |
pack pack pack pack_u v_float32x4 | v_matmul (const v_float32x4 &v, const v_float32x4 &m0, const v_float32x4 &m1, const v_float32x4 &m2, const v_float32x4 &m3) |
Matrix multiplication. | |
Pack | |
Pack values from two vectors to one Return vector type have twice more elements than input vector types. Variant with _u_ suffix also converts to corresponding unsigned type.
| |
typedef v_reg< schar, 16 > | v_int8x16 |
Sixteen 8-bit signed integer values. | |
typedef v_reg< ushort, 8 > | v_uint16x8 |
Eight 16-bit unsigned integer values. | |
typedef v_reg< short, 8 > | v_int16x8 |
Eight 16-bit signed integer values. | |
typedef v_reg< int, 4 > | v_int32x4 |
Four 32-bit signed integer values. | |
pack pack pack | ushort |
OPENCV_HAL_IMPL_C_PACK (v_uint16x8, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_PACK(v_int16x8 | |
pack | OPENCV_HAL_IMPL_C_PACK (v_uint32x4, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK(v_int32x4 |
pack pack | OPENCV_HAL_IMPL_C_PACK (v_uint64x2, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK(v_int64x2 |
pack pack pack | OPENCV_HAL_IMPL_C_PACK (v_int16x8, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK(v_int32x4 |
Init with zero | |
Create new vector with zero elements | |
V_TypeTraits< _Tp >::abs_type V_TypeTraits< _Tp >::abs_type int | int |
schar | |
s8 | short |
s8 s16 s32 | double |
s8 s16 s32 f64 | int64 |
OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16 | |
s8 | OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8 |
s8 s16 | OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4 |
s8 s16 s32 | OPENCV_HAL_IMPL_C_INIT_ZERO (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2 |
s8 s16 s32 f64 | OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2 |
Init with value | |
Create new vector with elements set to a specific value | |
OPENCV_HAL_IMPL_C_INIT_VAL (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16 | |
s8 | OPENCV_HAL_IMPL_C_INIT_VAL (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8 |
s8 s16 | OPENCV_HAL_IMPL_C_INIT_VAL (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4 |
s8 s16 s32 | OPENCV_HAL_IMPL_C_INIT_VAL (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2 |
s8 s16 s32 f64 | OPENCV_HAL_IMPL_C_INIT_VAL (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2 |
Reinterpret | |
Convert vector to different type without modifying underlying data. | |
OPENCV_HAL_IMPL_C_REINTERPRET (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16 | |
s8 | OPENCV_HAL_IMPL_C_REINTERPRET (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8 |
s8 s16 | OPENCV_HAL_IMPL_C_REINTERPRET (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4 |
s8 s16 s32 | OPENCV_HAL_IMPL_C_REINTERPRET (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2 |
s8 s16 s32 f64 | OPENCV_HAL_IMPL_C_REINTERPRET (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2 |
Left shift | |
| |
OPENCV_HAL_IMPL_C_SHIFTL (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8 | |
short | OPENCV_HAL_IMPL_C_SHIFTL (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4 |
short int | OPENCV_HAL_IMPL_C_SHIFTL (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2 |
Right shift | |
| |
OPENCV_HAL_IMPL_C_SHIFTR (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8 | |
short | OPENCV_HAL_IMPL_C_SHIFTR (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4 |
short int | OPENCV_HAL_IMPL_C_SHIFTR (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2 |
Rounding shift | |
| |
OPENCV_HAL_IMPL_C_RSHIFTR (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8 | |
short | OPENCV_HAL_IMPL_C_RSHIFTR (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4 |
short int | OPENCV_HAL_IMPL_C_RSHIFTR (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2 |
Pack with rounding shift | |
Pack values from two vectors to one with rounding shift Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower type and returned in the result vector. Variant with _u_ suffix converts to unsigned type.
| |
OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8 | |
pack | OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4 |
pack pack | OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2 |
pack pack pack | OPENCV_HAL_IMPL_C_RSHR_PACK (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4 |
Pack and store | |
Store values from the input vector into memory with pack Values will be stored into memory with saturating conversion to narrower type. Variant with _u_ suffix converts to corresponding unsigned type.
| |
OPENCV_HAL_IMPL_C_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8 | |
pack | OPENCV_HAL_IMPL_C_PACK_STORE (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4 |
pack pack | OPENCV_HAL_IMPL_C_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2 |
pack pack pack | OPENCV_HAL_IMPL_C_PACK_STORE (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4 |
Pack and store with rounding shift | |
Store values from the input vector into memory with pack Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into memory. Variant with _u_ suffix converts to unsigned type.
| |
OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8 | |
pack | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4 |
pack pack | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2 |
pack pack pack | OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4 |
Detailed Description
"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms.
Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86 architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers containing packed values of different types. In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as expected although it could be slower.
### Types
There are several types representing 128-bit register as a vector of packed values, each type is implemented as a structure based on a one SIMD register.
- cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
- cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
- cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int
- cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
- cv::v_float32x4: four 32-bit floating point values (signed) - float
- cv::v_float64x2: two 64-bit floating point valies (signed) - double
- Note:
- cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to check the CV_SIMD128_64F preprocessor definition:
#if CV_SIMD128_64F //... #endif
### Load and store operations
These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block.
- Constructors: from memory, from two values, ...
- Other create methods: v_setall_s8, v_setall_u8, ..., v_setzero_u8, v_setzero_s8, ...
- Memory operations: v_load, v_load_aligned, v_load_halves, v_store, v_store_aligned, v_store_high, v_store_low
### Value reordering
These operations allow to reorder or recombine elements in one or multiple vectors.
- Interleave, deinterleave (3 and 4 channels): v_load_deinterleave, v_store_interleave
- Expand: v_load_expand, v_load_expand_q, v_expand
- Pack: v_pack, v_pack_u, v_rshr_pack, v_rshr_pack_u, v_pack_store, v_pack_u_store, v_rshr_pack_store, v_rshr_pack_u_store
- Recombine: v_zip, v_recombine, v_combine_low, v_combine_high
- Extract: v_extract
### Arithmetic, bitwise and comparison operations
Element-wise binary and unary operations.
- Arithmetics: operator+(const v_reg &a, const v_reg &b) "+", operator-(const v_reg &a, const v_reg &b) "-", operator*(const v_reg &a, const v_reg &b) "*", operator/(const v_reg &a, const v_reg &b) "/", v_mul_expand
- Non-saturating arithmetics: v_add_wrap, v_sub_wrap
- Bitwise shifts: operator<<(const v_reg &a, int s) "<<", operator>>(const v_reg &a, int s) ">>", v_shl, v_shr
- Bitwise logic: operator&(const v_reg &a, const v_reg &b) "&", operator|(const v_reg &a, const v_reg &b) "|", operator^(const v_reg &a, const v_reg &b) "^", operator~(const v_reg &a) "~"
- Comparison: operator>(const v_reg &a, const v_reg &b) ">", operator>=(const v_reg &a, const v_reg &b) ">=", operator<(const v_reg &a, const v_reg &b) "<", operator<=(const v_reg &a, const v_reg &b) "<=", operator==(const v_reg &a, const v_reg &b) "==", operator!=(const v_reg &a, const v_reg &b) "!="
- min/max: v_min, v_max
### Reduce and mask
Most of these operations return only one value.
- Reduce: v_reduce_min, v_reduce_max, v_reduce_sum
- Mask: v_signmask, v_check_all, v_check_any, v_select
### Other math
- Some frequent operations: v_sqrt, v_invsqrt, v_magnitude, v_sqr_magnitude
- Absolute values: v_abs, v_absdiff
### Conversions
Different type conversions and casts:
- Rounding: v_round, v_floor, v_ceil, v_trunc,
- To float: v_cvt_f32, v_cvt_f64
- Reinterpret: v_reinterpret_as_u8, v_reinterpret_as_s8, ...
### Matrix operations
In these operations vectors represent matrix rows/columns: v_dotprod, v_matmul, v_transpose4x4
### Usability
Most operations are implemented only for some subset of the available types, following matrices shows the applicability of different operations to the types.
Regular integers:
| Operations\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 | |-------------------|:-:|:-:|:-:|:-:|:-:|:-:| |load, store | x | x | x | x | x | x | |interleave | x | x | x | x | x | x | |expand | x | x | x | x | x | x | |expand_q | x | x | | | | | |add, sub | x | x | x | x | x | x | |add_wrap, sub_wrap | x | x | x | x | | | |mul | | | x | x | x | x | |mul_expand | | | x | x | x | | |compare | x | x | x | x | x | x | |shift | | | x | x | x | x | |dotprod | | | | x | | | |logical | x | x | x | x | x | x | |min, max | x | x | x | x | x | x | |absdiff | x | x | x | x | x | x | |reduce | | | | | x | x | |mask | x | x | x | x | x | x | |pack | x | x | x | x | x | x | |pack_u | x | | x | | | | |unpack | x | x | x | x | x | x | |extract | x | x | x | x | x | x | |cvt_flt32 | | | | | | x | |cvt_flt64 | | | | | | x | |transpose4x4 | | | | | x | x |
Big integers:
| Operations\Types | uint 64x2 | int 64x2 | |-------------------|:-:|:-:| |load, store | x | x | |add, sub | x | x | |shift | x | x | |logical | x | x | |extract | x | x |
Floating point:
| Operations\Types | float 32x4 | float 64x2 | |-------------------|:-:|:-:| |load, store | x | x | |interleave | x | | |add, sub | x | x | |mul | x | x | |div | x | x | |compare | x | x | |min, max | x | x | |absdiff | x | x | |reduce | x | | |mask | x | x | |unpack | x | x | |cvt_flt32 | | x | |cvt_flt64 | x | | |sqrt, abs | x | x | |float math | x | x | |transpose4x4 | x | |
Typedef Documentation
typedef v_reg<float, 4> v_float32x4 |
Four 32-bit floating point values (single precision)
Definition at line 366 of file intrin_cpp.hpp.
typedef v_reg<double, 2> v_float64x2 |
Two 64-bit floating point values (double precision)
Definition at line 368 of file intrin_cpp.hpp.
pack v_int16x8 |
Eight 16-bit signed integer values.
Definition at line 360 of file intrin_cpp.hpp.
pack pack v_int32x4 |
Four 32-bit signed integer values.
Definition at line 364 of file intrin_cpp.hpp.
typedef v_reg<int64, 2> v_int64x2 |
Two 64-bit signed integer values.
Definition at line 372 of file intrin_cpp.hpp.
v_int8x16 |
Sixteen 8-bit signed integer values.
Definition at line 356 of file intrin_cpp.hpp.
pack pack pack v_uint16x8 |
Eight 16-bit unsigned integer values.
Definition at line 358 of file intrin_cpp.hpp.
typedef v_reg<unsigned, 4> v_uint32x4 |
Four 32-bit unsigned integer values.
Definition at line 362 of file intrin_cpp.hpp.
typedef v_reg<uint64, 2> v_uint64x2 |
Two 64-bit unsigned integer values.
Definition at line 370 of file intrin_cpp.hpp.
typedef v_reg<uchar, 16> v_uint8x16 |
Sixteen 8-bit unsigned integer values.
Definition at line 354 of file intrin_cpp.hpp.
Function Documentation
cv::OPENCV_HAL_IMPL_ADD_SUB_OP | ( | v_add_wrap | , |
+ | , | ||
(_Tp) | , | ||
_Tp | |||
) |
Add values without saturation.
For 8- and 16-bit integer values. Subtract values without saturation
For 8- and 16-bit integer values.
cv::OPENCV_HAL_IMPL_CMP_OP | ( | ) |
Less-than comparison.
For all types except 64-bit integer values. Greater-than comparison
For all types except 64-bit integer values. Less-than or equal comparison
For all types except 64-bit integer values.
V_TypeTraits<_Tp>::abs_type V_TypeTraits<_Tp>::abs_type int cv::OPENCV_HAL_IMPL_MATH_FUNC | ( | v_ceil | , |
cvCeil | , | ||
int | |||
) |
Ceil elements.
Only for floating point types. Truncate elements
Only for floating point types.
cv::OPENCV_HAL_IMPL_MATH_FUNC | ( | v_sqrt | , |
std::sqrt | , | ||
_Tp | |||
) |
Square root of elements.
Only for floating point types. Absolute value of elements
Only for floating point types.
V_TypeTraits<_Tp>::abs_type V_TypeTraits<_Tp>::abs_type cv::OPENCV_HAL_IMPL_MATH_FUNC | ( | v_round | , |
cvRound | , | ||
int | |||
) |
Round elements.
Only for floating point types. Floor elements
Only for floating point types.
cv::OPENCV_HAL_IMPL_MINMAX_FUNC | ( | v_min | , |
std::min | |||
) |
Choose min values for each pair.
Scheme:
{A1 A2 ...} {B1 B2 ...} -------------- {min(A1,B1) min(A2,B2) ...}
For all types except 64-bit integer. Choose max values for each pair
Scheme:
{A1 A2 ...} {B1 B2 ...} -------------- {max(A1,B1) max(A2,B2) ...}
For all types except 64-bit integer.
std::max cv::OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC | ( | v_reduce_min | , |
std::min | |||
) |
Find one min value.
Scheme:
{A1 A2 A3 ...} => min(A1,A2,A3,...)
For 32-bit integer and 32-bit floating point types. Find one max value
Scheme:
{A1 A2 A3 ...} => max(A1,A2,A3,...)
For 32-bit integer and 32-bit floating point types.
cv::OPENCV_HAL_IMPL_SHIFT_OP | ( | << | ) |
Bitwise shift left.
For 16-, 32- and 64-bit integer values. Bitwise shift right
For 16-, 32- and 64-bit integer values. Sum packed values
Scheme:
{A1 A2 A3 ...} => sum{A1,A2,A3,...}
For 32-bit integer and 32-bit floating point types.
_Tp v_reg<typename V_TypeTraits<_Tp>::abs_type, n> cv::v_absdiff | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b | ||
) |
Absolute difference.
Returns converted to corresponding unsigned type. Example:
{.cpp} v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1} v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}
For 8-, 16-, 32-bit integer source types.
Definition at line 671 of file intrin_cpp.hpp.
v_float64x2 v_absdiff | ( | const v_float32x4 & | a, |
const v_float32x4 & | b | ||
) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 32-bit floating point values.
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 64-bit floating point values.
Definition at line 688 of file intrin_cpp.hpp.
v_reg<int, n*2> cv::v_ceil | ( | const v_reg< double, n > & | a ) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
Definition at line 1386 of file intrin_cpp.hpp.
v_reg<int, n> cv::v_ceil | ( | const v_reg< float, n > & | a ) |
Ceil.
Ceil each value. Input type is float vector ==> output type is int vector.
Definition at line 1342 of file intrin_cpp.hpp.
bool cv::v_check_all | ( | const v_reg< _Tp, n > & | a ) |
Check if all packed values are less than zero.
Unsigned values will be casted to signed: `uchar 254 => char -2`. For all types except 64-bit.
Definition at line 882 of file intrin_cpp.hpp.
bool cv::v_check_any | ( | const v_reg< _Tp, n > & | a ) |
Check if any of packed values is less than zero.
Unsigned values will be casted to signed: `uchar 254 => char -2`. For all types except 64-bit.
Definition at line 894 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_combine_high | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b | ||
) |
Combine vector from last elements of two vectors.
Scheme:
{A1 A2 A3 A4} {B1 B2 B3 B4} --------------- {A3 A4 B3 B4}
For all types except 64-bit.
Definition at line 1255 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_combine_low | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b | ||
) |
Combine vector from first elements of two vectors.
Scheme:
{A1 A2 A3 A4} {B1 B2 B3 B4} --------------- {A1 A2 B1 B2}
For all types except 64-bit.
Definition at line 1233 of file intrin_cpp.hpp.
v_reg<float, n> cv::v_cvt_f32 | ( | const v_reg< int, n > & | a ) |
Convert to float.
Supported input type is cv::v_int32x4.
Definition at line 1412 of file intrin_cpp.hpp.
v_reg<double, n> cv::v_cvt_f64 | ( | const v_reg< float, n *2 > & | a ) |
Convert to double.
Supported input type is cv::v_float32x4.
Definition at line 1434 of file intrin_cpp.hpp.
v_reg<double, n> cv::v_cvt_f64 | ( | const v_reg< int, n *2 > & | a ) |
Convert to double.
Supported input type is cv::v_int32x4.
Definition at line 1423 of file intrin_cpp.hpp.
v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_dotprod | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b | ||
) |
Dot product of elements.
Multiply values in two registers and sum adjacent result pairs. Scheme:
{A1 A2 ...} // 16-bit x {B1 B2 ...} // 16-bit ------------- {A1B1+A2B2 ...} // 32-bit
Implemented only for 16-bit signed source type (v_int16x8).
Definition at line 773 of file intrin_cpp.hpp.
void cv::v_expand | ( | const v_reg< _Tp, n > & | a, |
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > & | b0, | ||
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > & | b1 | ||
) |
Expand values to the wider pack type.
Copy contents of register to two registers with 2x wider pack type. Scheme:
int32x4 int64x2 int64x2 {A B C D} ==> {A B} , {C D}
Definition at line 932 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_extract | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b | ||
) |
Vector extract.
Scheme:
{A1 A2 A3 A4} {B1 B2 B3 B4} ======================== shift = 1 {A2 A3 A4 B1} shift = 2 {A3 A4 B1 B2} shift = 3 {A4 B1 B2 B3}
Restriction: 0 <= shift < nlanes
Usage:
v_int32x4 a, b, c; c = v_extract<2>(a, b);
For integer types only.
Definition at line 1305 of file intrin_cpp.hpp.
v_reg<int, n*2> cv::v_floor | ( | const v_reg< double, n > & | a ) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
Definition at line 1374 of file intrin_cpp.hpp.
v_reg<int, n> cv::v_floor | ( | const v_reg< float, n > & | a ) |
Floor.
Floor each value. Input type is float vector ==> output type is int vector.
Definition at line 1331 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_invsqrt | ( | const v_reg< _Tp, n > & | a ) |
Inversed square root.
Returns For floating point types only.
Definition at line 712 of file intrin_cpp.hpp.
v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load | ( | const _Tp * | ptr ) |
Load register contents from memory.
- Parameters:
-
ptr pointer to memory block with data
- Returns:
- register object
- Note:
- Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.
Definition at line 998 of file intrin_cpp.hpp.
v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load_aligned | ( | const _Tp * | ptr ) |
Load register contents from memory (aligned)
similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)
Definition at line 1008 of file intrin_cpp.hpp.
void cv::v_load_deinterleave | ( | const _Tp * | ptr, |
v_reg< _Tp, n > & | a, | ||
v_reg< _Tp, n > & | b, | ||
v_reg< _Tp, n > & | c, | ||
v_reg< _Tp, n > & | d | ||
) |
Load and deinterleave (3 channels)
Load data from memory deinterleave and store to 3 registers. Scheme:
{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}
For all types except 64-bit.
Definition at line 1107 of file intrin_cpp.hpp.
void cv::v_load_deinterleave | ( | const _Tp * | ptr, |
v_reg< _Tp, n > & | a, | ||
v_reg< _Tp, n > & | b, | ||
v_reg< _Tp, n > & | c | ||
) |
Load and deinterleave (4 channels)
Load data from memory deinterleave and store to 4 registers. Scheme:
{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}
For all types except 64-bit.
Definition at line 1086 of file intrin_cpp.hpp.
v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2> cv::v_load_expand | ( | const _Tp * | ptr ) |
Load register contents from memory with double expand.
Same as cv::v_load, but result pack type will be 2x wider than memory type.
{.cpp} short buf[4] = {1, 2, 3, 4}; // type is int16 v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32
For 8-, 16-, 32-bit integer source types.
Definition at line 1046 of file intrin_cpp.hpp.
v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4> cv::v_load_expand_q | ( | const _Tp * | ptr ) |
Load register contents from memory with quad expand.
Same as cv::v_load_expand, but result type is 4 times wider than source.
{.cpp} char buf[4] = {1, 2, 3, 4}; // type is int8 v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32
For 8-bit integer source types.
Definition at line 1067 of file intrin_cpp.hpp.
v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load_halves | ( | const _Tp * | loptr, |
const _Tp * | hiptr | ||
) |
Load register contents from two memory blocks.
- Parameters:
-
loptr memory block containing data for first half (0..n/2) hiptr memory block containing data for second half (n/2..n)
{.cpp} int lo[2] = { 1, 2 }, hi[2] = { 3, 4 }; v_int32x4 r = v_load_halves(lo, hi);
Definition at line 1024 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_magnitude | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b | ||
) |
pack pack pack pack_u v_float32x4 cv::v_matmul | ( | const v_float32x4 & | v, |
const v_float32x4 & | m0, | ||
const v_float32x4 & | m1, | ||
const v_float32x4 & | m2, | ||
const v_float32x4 & | m3 | ||
) |
Matrix multiplication.
Scheme:
{A0 A1 A2 A3} |V0| {B0 B1 B2 B3} |V1| {C0 C1 C2 C3} |V2| {D0 D1 D2 D3} x |V3| ==================== {R0 R1 R2 R3}, where: R0 = A0V0 + A1V1 + A2V2 + A3V3, R1 = B0V0 + B1V1 + B2V2 + B3V3 ...
Definition at line 1724 of file intrin_cpp.hpp.
void cv::v_mul_expand | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b, | ||
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > & | c, | ||
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > & | d | ||
) |
Multiply and expand.
Multiply values two registers and store results in two registers with wider pack type. Scheme:
{A B C D} // 32-bit x {E F G H} // 32-bit --------------- {AE BF} // 64-bit {CG DH} // 64-bit
Example:
{.cpp} v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2} v_uint64x2 c, d; // results v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}
Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).
Definition at line 801 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_muladd | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b, | ||
const v_reg< _Tp, n > & | c | ||
) |
Multiply and add.
Returns For floating point types only.
Definition at line 751 of file intrin_cpp.hpp.
void cv::v_recombine | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b, | ||
v_reg< _Tp, n > & | low, | ||
v_reg< _Tp, n > & | high | ||
) |
Combine two vectors from lower and higher parts of two other vectors.
{.cpp} low = cv::v_combine_low(a, b); high = cv::v_combine_high(a, b);
Definition at line 1273 of file intrin_cpp.hpp.
v_reg<int, n> cv::v_round | ( | const v_reg< float, n > & | a ) |
Round.
Rounds each value. Input type is float vector ==> output type is int vector.
Definition at line 1320 of file intrin_cpp.hpp.
v_reg<int, n*2> cv::v_round | ( | const v_reg< double, n > & | a ) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
Definition at line 1362 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_select | ( | const v_reg< _Tp, n > & | mask, |
const v_reg< _Tp, n > & | a, | ||
const v_reg< _Tp, n > & | b | ||
) |
Bitwise select.
Return value will be built by combining values a and b using the following scheme: If the i-th bit in _mask_ is 1 select i-th bit from _a_ else select i-th bit from _b_
Definition at line 909 of file intrin_cpp.hpp.
int cv::v_signmask | ( | const v_reg< _Tp, n > & | a ) |
Get negative values mask.
Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. Example:
{.cpp} v_int32x4 r; // set to {-1, -1, 1, 1} int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011
For all types except 64-bit.
Definition at line 870 of file intrin_cpp.hpp.
v_reg<_Tp, n> cv::v_sqr_magnitude | ( | const v_reg< _Tp, n > & | a, |
const v_reg< _Tp, n > & | b | ||
) |
Square of the magnitude.
Returns For floating point types only.
Definition at line 738 of file intrin_cpp.hpp.
void cv::v_store | ( | _Tp * | ptr, |
const v_reg< _Tp, n > & | a | ||
) |
Store data to memory.
Store register contents to memory. Scheme:
REG {A B C D} ==> MEM {A B C D}
Pointer can be unaligned.
Definition at line 1173 of file intrin_cpp.hpp.
void cv::v_store_aligned | ( | _Tp * | ptr, |
const v_reg< _Tp, n > & | a | ||
) |
Store data to memory (aligned)
Store register contents to memory. Scheme:
REG {A B C D} ==> MEM {A B C D}
Pointer __should__ be aligned by 16-byte boundary.
Definition at line 1216 of file intrin_cpp.hpp.
void cv::v_store_high | ( | _Tp * | ptr, |
const v_reg< _Tp, n > & | a | ||
) |
Store data to memory (higher half)
Store higher half of register contents to memory. Scheme:
REG {A B C D} ==> MEM {C D}
Definition at line 1201 of file intrin_cpp.hpp.
void cv::v_store_interleave | ( | _Tp * | ptr, |
const v_reg< _Tp, n > & | a, | ||
const v_reg< _Tp, n > & | b, | ||
const v_reg< _Tp, n > & | c, | ||
const v_reg< _Tp, n > & | d | ||
) |
Interleave and store (4 channels)
Interleave and store data from 4 registers to memory. Scheme:
{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}
For all types except 64-bit.
Definition at line 1150 of file intrin_cpp.hpp.
void cv::v_store_interleave | ( | _Tp * | ptr, |
const v_reg< _Tp, n > & | a, | ||
const v_reg< _Tp, n > & | b, | ||
const v_reg< _Tp, n > & | c | ||
) |
Interleave and store (3 channels)
Interleave and store data from 3 registers to memory. Scheme:
{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}
For all types except 64-bit.
Definition at line 1130 of file intrin_cpp.hpp.
void cv::v_store_low | ( | _Tp * | ptr, |
const v_reg< _Tp, n > & | a | ||
) |
Store data to memory (lower half)
Store lower half of register contents to memory. Scheme:
REG {A B C D} ==> MEM {A B}
Definition at line 1187 of file intrin_cpp.hpp.
void cv::v_transpose4x4 | ( | v_reg< _Tp, 4 > & | a0, |
const v_reg< _Tp, 4 > & | a1, | ||
const v_reg< _Tp, 4 > & | a2, | ||
const v_reg< _Tp, 4 > & | a3, | ||
v_reg< _Tp, 4 > & | b0, | ||
v_reg< _Tp, 4 > & | b1, | ||
v_reg< _Tp, 4 > & | b2, | ||
v_reg< _Tp, 4 > & | b3 | ||
) |
Transpose 4x4 matrix.
Scheme:
a0 {A1 A2 A3 A4} a1 {B1 B2 B3 B4} a2 {C1 C2 C3 C4} a3 {D1 D2 D3 D4} =============== b0 {A1 B1 C1 D1} b1 {A2 B2 C2 D2} b2 {A3 B3 C3 D3} b3 {A4 B4 C4 D4}
Definition at line 1458 of file intrin_cpp.hpp.
v_reg<int, n*2> cv::v_trunc | ( | const v_reg< double, n > & | a ) |
This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.
Definition at line 1398 of file intrin_cpp.hpp.
v_reg<int, n> cv::v_trunc | ( | const v_reg< float, n > & | a ) |
Trunc.
Truncate each value. Input type is float vector ==> output type is int vector.
Definition at line 1353 of file intrin_cpp.hpp.
void cv::v_zip | ( | const v_reg< _Tp, n > & | a0, |
const v_reg< _Tp, n > & | a1, | ||
v_reg< _Tp, n > & | b0, | ||
v_reg< _Tp, n > & | b1 | ||
) |
Interleave two vectors.
Scheme:
{A1 A2 A3 A4} {B1 B2 B3 B4} --------------- {A1 B1 A2 B2} and {A3 B3 A4 B4}
For all types except 64-bit.
Definition at line 974 of file intrin_cpp.hpp.
Generated on Tue Jul 12 2022 15:17:34 by 1.7.2