Opencv 3.1 project on GR-PEACH board

Fork of gr-peach-opencv-project by the do

Embed: (wiki syntax)

« Back to documentation index

Universal intrinsics

"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. More...

Modules

 Private implementation helpers

Typedefs

typedef v_reg< uchar, 16 > v_uint8x16
 Sixteen 8-bit unsigned integer values.
typedef v_reg< unsigned, 4 > v_uint32x4
 Four 32-bit unsigned integer values.
typedef v_reg< float, 4 > v_float32x4
 Four 32-bit floating point values (single precision)
typedef v_reg< double, 2 > v_float64x2
 Two 64-bit floating point values (double precision)
typedef v_reg< uint64, 2 > v_uint64x2
 Two 64-bit unsigned integer values.
typedef v_reg< int64, 2 > v_int64x2
 Two 64-bit signed integer values.

Functions

 OPENCV_HAL_IMPL_MATH_FUNC (v_sqrt, std::sqrt, _Tp) OPENCV_HAL_IMPL_MATH_FUNC(v_abs
 Square root of elements.
V_TypeTraits< _Tp >::abs_type
V_TypeTraits< _Tp >::abs_type 
OPENCV_HAL_IMPL_MATH_FUNC (v_round, cvRound, int) OPENCV_HAL_IMPL_MATH_FUNC(v_floor
 Round elements.
V_TypeTraits< _Tp >::abs_type
V_TypeTraits< _Tp >::abs_type
int 
OPENCV_HAL_IMPL_MATH_FUNC (v_ceil, cvCeil, int) OPENCV_HAL_IMPL_MATH_FUNC(v_trunc
 Ceil elements.
 OPENCV_HAL_IMPL_MINMAX_FUNC (v_min, std::min) OPENCV_HAL_IMPL_MINMAX_FUNC(v_max
 Choose min values for each pair.
std::max OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC (v_reduce_min, std::min) OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max
 Find one min value.
 OPENCV_HAL_IMPL_CMP_OP (<) OPENCV_HAL_IMPL_CMP_OP(>) OPENCV_HAL_IMPL_CMP_OP(<
 Less-than comparison.
 OPENCV_HAL_IMPL_ADD_SUB_OP (v_add_wrap,+,(_Tp), _Tp) OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap
 Add values without saturation.
template<typename _Tp , int n>
_Tp v_reg< typename
V_TypeTraits< _Tp >::abs_type,
n > 
v_absdiff (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Absolute difference.
v_float32x4 v_absdiff (const v_float32x4 &a, const v_float32x4 &b)
 This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 32-bit floating point values.
template<typename _Tp , int n>
v_reg< _Tp, n > v_invsqrt (const v_reg< _Tp, n > &a)
 Inversed square root.
template<typename _Tp , int n>
v_reg< _Tp, n > v_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Magnitude.
template<typename _Tp , int n>
v_reg< _Tp, n > v_sqr_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Square of the magnitude.
template<typename _Tp , int n>
v_reg< _Tp, n > v_muladd (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
 Multiply and add.
template<typename _Tp , int n>
v_reg< typename V_TypeTraits
< _Tp >::w_type, n/2 > 
v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Dot product of elements.
template<typename _Tp , int n>
void v_mul_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
 Multiply and expand.
 OPENCV_HAL_IMPL_SHIFT_OP (<<) OPENCV_HAL_IMPL_SHIFT_OP(>>) template< typename _Tp
 Bitwise shift left.
template<typename _Tp , int n>
int v_signmask (const v_reg< _Tp, n > &a)
 Get negative values mask.
template<typename _Tp , int n>
bool v_check_all (const v_reg< _Tp, n > &a)
 Check if all packed values are less than zero.
template<typename _Tp , int n>
bool v_check_any (const v_reg< _Tp, n > &a)
 Check if any of packed values is less than zero.
template<typename _Tp , int n>
v_reg< _Tp, n > v_select (const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Bitwise select.
template<typename _Tp , int n>
void v_expand (const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
 Expand values to the wider pack type.
template<typename _Tp , int n>
void v_zip (const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
 Interleave two vectors.
template<typename _Tp >
v_reg< _Tp, V_SIMD128Traits
< _Tp >::nlanes > 
v_load (const _Tp *ptr)
 Load register contents from memory.
template<typename _Tp >
v_reg< _Tp, V_SIMD128Traits
< _Tp >::nlanes > 
v_load_aligned (const _Tp *ptr)
 Load register contents from memory (aligned)
template<typename _Tp >
v_reg< _Tp, V_SIMD128Traits
< _Tp >::nlanes > 
v_load_halves (const _Tp *loptr, const _Tp *hiptr)
 Load register contents from two memory blocks.
template<typename _Tp >
v_reg< typename V_TypeTraits
< _Tp >::w_type,
V_SIMD128Traits< _Tp >::nlanes/2 > 
v_load_expand (const _Tp *ptr)
 Load register contents from memory with double expand.
template<typename _Tp >
v_reg< typename V_TypeTraits
< _Tp >::q_type,
V_SIMD128Traits< _Tp >::nlanes/4 > 
v_load_expand_q (const _Tp *ptr)
 Load register contents from memory with quad expand.
template<typename _Tp , int n>
void v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c)
 Load and deinterleave (4 channels)
template<typename _Tp , int n>
void v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c, v_reg< _Tp, n > &d)
 Load and deinterleave (3 channels)
template<typename _Tp , int n>
void v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
 Interleave and store (3 channels)
template<typename _Tp , int n>
void v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, const v_reg< _Tp, n > &d)
 Interleave and store (4 channels)
template<typename _Tp , int n>
void v_store (_Tp *ptr, const v_reg< _Tp, n > &a)
 Store data to memory.
template<typename _Tp , int n>
void v_store_low (_Tp *ptr, const v_reg< _Tp, n > &a)
 Store data to memory (lower half)
template<typename _Tp , int n>
void v_store_high (_Tp *ptr, const v_reg< _Tp, n > &a)
 Store data to memory (higher half)
template<typename _Tp , int n>
void v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a)
 Store data to memory (aligned)
template<typename _Tp , int n>
v_reg< _Tp, n > v_combine_low (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Combine vector from first elements of two vectors.
template<typename _Tp , int n>
v_reg< _Tp, n > v_combine_high (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Combine vector from last elements of two vectors.
template<typename _Tp , int n>
void v_recombine (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
 Combine two vectors from lower and higher parts of two other vectors.
template<int s, typename _Tp , int n>
v_reg< _Tp, n > v_extract (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
 Vector extract.
template<int n>
v_reg< int, n > v_round (const v_reg< float, n > &a)
 Round.
template<int n>
v_reg< int, n > v_floor (const v_reg< float, n > &a)
 Floor.
template<int n>
v_reg< int, n > v_ceil (const v_reg< float, n > &a)
 Ceil.
template<int n>
v_reg< int, n > v_trunc (const v_reg< float, n > &a)
 Trunc.
template<int n>
v_reg< int, n *2 > v_round (const v_reg< double, n > &a)
template<int n>
v_reg< int, n *2 > v_floor (const v_reg< double, n > &a)
template<int n>
v_reg< int, n *2 > v_ceil (const v_reg< double, n > &a)
template<int n>
v_reg< int, n *2 > v_trunc (const v_reg< double, n > &a)
template<int n>
v_reg< float, n > v_cvt_f32 (const v_reg< int, n > &a)
 Convert to float.
template<int n>
v_reg< double, n > v_cvt_f64 (const v_reg< int, n *2 > &a)
 Convert to double.
template<int n>
v_reg< double, n > v_cvt_f64 (const v_reg< float, n *2 > &a)
 Convert to double.
template<typename _Tp >
void v_transpose4x4 (v_reg< _Tp, 4 > &a0, const v_reg< _Tp, 4 > &a1, const v_reg< _Tp, 4 > &a2, const v_reg< _Tp, 4 > &a3, v_reg< _Tp, 4 > &b0, v_reg< _Tp, 4 > &b1, v_reg< _Tp, 4 > &b2, v_reg< _Tp, 4 > &b3)
 Transpose 4x4 matrix.
pack pack pack pack_u v_float32x4 v_matmul (const v_float32x4 &v, const v_float32x4 &m0, const v_float32x4 &m1, const v_float32x4 &m2, const v_float32x4 &m3)
 Matrix multiplication.

Pack

Pack values from two vectors to one

Return vector type have twice more elements than input vector types. Variant with _u_ suffix also converts to corresponding unsigned type.

  • pack: for 16-, 32- and 64-bit integer input types
  • pack_u: for 16- and 32-bit signed integer input types

typedef v_reg< schar, 16 > v_int8x16
 Sixteen 8-bit signed integer values.
typedef v_reg< ushort, 8 > v_uint16x8
 Eight 16-bit unsigned integer values.
typedef v_reg< short, 8 > v_int16x8
 Eight 16-bit signed integer values.
typedef v_reg< int, 4 > v_int32x4
 Four 32-bit signed integer values.
pack pack pack ushort
 OPENCV_HAL_IMPL_C_PACK (v_uint16x8, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_PACK(v_int16x8
pack OPENCV_HAL_IMPL_C_PACK (v_uint32x4, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK(v_int32x4
pack pack OPENCV_HAL_IMPL_C_PACK (v_uint64x2, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK(v_int64x2
pack pack pack OPENCV_HAL_IMPL_C_PACK (v_int16x8, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK(v_int32x4

Init with zero

Create new vector with zero elements


V_TypeTraits< _Tp >::abs_type
V_TypeTraits< _Tp >::abs_type
int 
int
 schar
s8 short
s8 s16 s32 double
s8 s16 s32 f64 int64
 OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16
s8 OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8
s8 s16 OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4
s8 s16 s32 OPENCV_HAL_IMPL_C_INIT_ZERO (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2
s8 s16 s32 f64 OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2

Init with value

Create new vector with elements set to a specific value


 OPENCV_HAL_IMPL_C_INIT_VAL (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16
s8 OPENCV_HAL_IMPL_C_INIT_VAL (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8
s8 s16 OPENCV_HAL_IMPL_C_INIT_VAL (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4
s8 s16 s32 OPENCV_HAL_IMPL_C_INIT_VAL (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2
s8 s16 s32 f64 OPENCV_HAL_IMPL_C_INIT_VAL (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2

Reinterpret

Convert vector to different type without modifying underlying data.


 OPENCV_HAL_IMPL_C_REINTERPRET (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16
s8 OPENCV_HAL_IMPL_C_REINTERPRET (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8
s8 s16 OPENCV_HAL_IMPL_C_REINTERPRET (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4
s8 s16 s32 OPENCV_HAL_IMPL_C_REINTERPRET (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2
s8 s16 s32 f64 OPENCV_HAL_IMPL_C_REINTERPRET (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2

Left shift

Shift left


 OPENCV_HAL_IMPL_C_SHIFTL (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8
short OPENCV_HAL_IMPL_C_SHIFTL (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4
short int OPENCV_HAL_IMPL_C_SHIFTL (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2

Right shift

Shift right


 OPENCV_HAL_IMPL_C_SHIFTR (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8
short OPENCV_HAL_IMPL_C_SHIFTR (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4
short int OPENCV_HAL_IMPL_C_SHIFTR (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2

Rounding shift

Rounding shift right


 OPENCV_HAL_IMPL_C_RSHIFTR (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8
short OPENCV_HAL_IMPL_C_RSHIFTR (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4
short int OPENCV_HAL_IMPL_C_RSHIFTR (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2

Pack with rounding shift

Pack values from two vectors to one with rounding shift

Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower type and returned in the result vector. Variant with _u_ suffix converts to unsigned type.

  • pack: for 16-, 32- and 64-bit integer input types
  • pack_u: for 16- and 32-bit signed integer input types

 OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8
pack OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4
pack pack OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2
pack pack pack OPENCV_HAL_IMPL_C_RSHR_PACK (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4

Pack and store

Store values from the input vector into memory with pack

Values will be stored into memory with saturating conversion to narrower type. Variant with _u_ suffix converts to corresponding unsigned type.

  • pack: for 16-, 32- and 64-bit integer input types
  • pack_u: for 16- and 32-bit signed integer input types

 OPENCV_HAL_IMPL_C_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8
pack OPENCV_HAL_IMPL_C_PACK_STORE (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4
pack pack OPENCV_HAL_IMPL_C_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2
pack pack pack OPENCV_HAL_IMPL_C_PACK_STORE (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4

Pack and store with rounding shift

Store values from the input vector into memory with pack

Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into memory. Variant with _u_ suffix converts to unsigned type.

  • pack: for 16-, 32- and 64-bit integer input types
  • pack_u: for 16- and 32-bit signed integer input types

 OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8
pack OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4
pack pack OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2
pack pack pack OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4

Detailed Description

"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms.

Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86 architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers containing packed values of different types. In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as expected although it could be slower.

### Types

There are several types representing 128-bit register as a vector of packed values, each type is implemented as a structure based on a one SIMD register.

  • cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
  • cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
  • cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int
  • cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
  • cv::v_float32x4: four 32-bit floating point values (signed) - float
  • cv::v_float64x2: two 64-bit floating point valies (signed) - double
Note:
cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to check the CV_SIMD128_64F preprocessor definition:
#if CV_SIMD128_64F
//...
#endif

### Load and store operations

These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block.

### Value reordering

These operations allow to reorder or recombine elements in one or multiple vectors.

### Arithmetic, bitwise and comparison operations

Element-wise binary and unary operations.

  • Arithmetics: operator+(const v_reg &a, const v_reg &b) "+", operator-(const v_reg &a, const v_reg &b) "-", operator*(const v_reg &a, const v_reg &b) "*", operator/(const v_reg &a, const v_reg &b) "/", v_mul_expand
  • Non-saturating arithmetics: v_add_wrap, v_sub_wrap
  • Bitwise shifts: operator<<(const v_reg &a, int s) "<<", operator>>(const v_reg &a, int s) ">>", v_shl, v_shr
  • Bitwise logic: operator&(const v_reg &a, const v_reg &b) "&", operator|(const v_reg &a, const v_reg &b) "|", operator^(const v_reg &a, const v_reg &b) "^", operator~(const v_reg &a) "~"
  • Comparison: operator>(const v_reg &a, const v_reg &b) ">", operator>=(const v_reg &a, const v_reg &b) ">=", operator<(const v_reg &a, const v_reg &b) "<", operator<=(const v_reg &a, const v_reg &b) "<=", operator==(const v_reg &a, const v_reg &b) "==", operator!=(const v_reg &a, const v_reg &b) "!="
  • min/max: v_min, v_max

### Reduce and mask

Most of these operations return only one value.

### Other math

### Conversions

Different type conversions and casts:

### Matrix operations

In these operations vectors represent matrix rows/columns: v_dotprod, v_matmul, v_transpose4x4

### Usability

Most operations are implemented only for some subset of the available types, following matrices shows the applicability of different operations to the types.

Regular integers:

| Operations\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 | |-------------------|:-:|:-:|:-:|:-:|:-:|:-:| |load, store | x | x | x | x | x | x | |interleave | x | x | x | x | x | x | |expand | x | x | x | x | x | x | |expand_q | x | x | | | | | |add, sub | x | x | x | x | x | x | |add_wrap, sub_wrap | x | x | x | x | | | |mul | | | x | x | x | x | |mul_expand | | | x | x | x | | |compare | x | x | x | x | x | x | |shift | | | x | x | x | x | |dotprod | | | | x | | | |logical | x | x | x | x | x | x | |min, max | x | x | x | x | x | x | |absdiff | x | x | x | x | x | x | |reduce | | | | | x | x | |mask | x | x | x | x | x | x | |pack | x | x | x | x | x | x | |pack_u | x | | x | | | | |unpack | x | x | x | x | x | x | |extract | x | x | x | x | x | x | |cvt_flt32 | | | | | | x | |cvt_flt64 | | | | | | x | |transpose4x4 | | | | | x | x |

Big integers:

| Operations\Types | uint 64x2 | int 64x2 | |-------------------|:-:|:-:| |load, store | x | x | |add, sub | x | x | |shift | x | x | |logical | x | x | |extract | x | x |

Floating point:

| Operations\Types | float 32x4 | float 64x2 | |-------------------|:-:|:-:| |load, store | x | x | |interleave | x | | |add, sub | x | x | |mul | x | x | |div | x | x | |compare | x | x | |min, max | x | x | |absdiff | x | x | |reduce | x | | |mask | x | x | |unpack | x | x | |cvt_flt32 | | x | |cvt_flt64 | x | | |sqrt, abs | x | x | |float math | x | x | |transpose4x4 | x | |


Typedef Documentation

typedef v_reg<float, 4> v_float32x4

Four 32-bit floating point values (single precision)

Definition at line 366 of file intrin_cpp.hpp.

typedef v_reg<double, 2> v_float64x2

Two 64-bit floating point values (double precision)

Definition at line 368 of file intrin_cpp.hpp.

pack v_int16x8

Eight 16-bit signed integer values.

Definition at line 360 of file intrin_cpp.hpp.

pack pack v_int32x4

Four 32-bit signed integer values.

Definition at line 364 of file intrin_cpp.hpp.

typedef v_reg<int64, 2> v_int64x2

Two 64-bit signed integer values.

Definition at line 372 of file intrin_cpp.hpp.

v_int8x16

Sixteen 8-bit signed integer values.

Definition at line 356 of file intrin_cpp.hpp.

pack pack pack v_uint16x8

Eight 16-bit unsigned integer values.

Definition at line 358 of file intrin_cpp.hpp.

typedef v_reg<unsigned, 4> v_uint32x4

Four 32-bit unsigned integer values.

Definition at line 362 of file intrin_cpp.hpp.

typedef v_reg<uint64, 2> v_uint64x2

Two 64-bit unsigned integer values.

Definition at line 370 of file intrin_cpp.hpp.

typedef v_reg<uchar, 16> v_uint8x16

Sixteen 8-bit unsigned integer values.

Definition at line 354 of file intrin_cpp.hpp.


Function Documentation

cv::OPENCV_HAL_IMPL_ADD_SUB_OP ( v_add_wrap  ,
,
(_Tp)  ,
_Tp   
)

Add values without saturation.

For 8- and 16-bit integer values. Subtract values without saturation

For 8- and 16-bit integer values.

cv::OPENCV_HAL_IMPL_CMP_OP (  )

Less-than comparison.

For all types except 64-bit integer values. Greater-than comparison

For all types except 64-bit integer values. Less-than or equal comparison

For all types except 64-bit integer values.

V_TypeTraits<_Tp>::abs_type V_TypeTraits<_Tp>::abs_type int cv::OPENCV_HAL_IMPL_MATH_FUNC ( v_ceil  ,
cvCeil  ,
int   
)

Ceil elements.

Only for floating point types. Truncate elements

Only for floating point types.

cv::OPENCV_HAL_IMPL_MATH_FUNC ( v_sqrt  ,
std::sqrt  ,
_Tp   
)

Square root of elements.

Only for floating point types. Absolute value of elements

Only for floating point types.

V_TypeTraits<_Tp>::abs_type V_TypeTraits<_Tp>::abs_type cv::OPENCV_HAL_IMPL_MATH_FUNC ( v_round  ,
cvRound  ,
int   
)

Round elements.

Only for floating point types. Floor elements

Only for floating point types.

cv::OPENCV_HAL_IMPL_MINMAX_FUNC ( v_min  ,
std::min   
)

Choose min values for each pair.

Scheme:

{A1 A2 ...}
{B1 B2 ...}
--------------
{min(A1,B1) min(A2,B2) ...}

For all types except 64-bit integer. Choose max values for each pair

Scheme:

{A1 A2 ...}
{B1 B2 ...}
--------------
{max(A1,B1) max(A2,B2) ...}

For all types except 64-bit integer.

std::max cv::OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC ( v_reduce_min  ,
std::min   
)

Find one min value.

Scheme:

{A1 A2 A3 ...} => min(A1,A2,A3,...)

For 32-bit integer and 32-bit floating point types. Find one max value

Scheme:

{A1 A2 A3 ...} => max(A1,A2,A3,...)

For 32-bit integer and 32-bit floating point types.

cv::OPENCV_HAL_IMPL_SHIFT_OP ( <<   )

Bitwise shift left.

For 16-, 32- and 64-bit integer values. Bitwise shift right

For 16-, 32- and 64-bit integer values. Sum packed values

Scheme:

{A1 A2 A3 ...} => sum{A1,A2,A3,...}

For 32-bit integer and 32-bit floating point types.

_Tp v_reg<typename V_TypeTraits<_Tp>::abs_type, n> cv::v_absdiff ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Absolute difference.

Returns $ |a - b| $ converted to corresponding unsigned type. Example:

 {.cpp}
v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}
v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}

For 8-, 16-, 32-bit integer source types.

Definition at line 671 of file intrin_cpp.hpp.

v_float64x2 v_absdiff ( const v_float32x4 &  a,
const v_float32x4 &  b 
)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 32-bit floating point values.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 64-bit floating point values.

Definition at line 688 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_ceil ( const v_reg< double, n > &  a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1386 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_ceil ( const v_reg< float, n > &  a )

Ceil.

Ceil each value. Input type is float vector ==> output type is int vector.

Definition at line 1342 of file intrin_cpp.hpp.

bool cv::v_check_all ( const v_reg< _Tp, n > &  a )

Check if all packed values are less than zero.

Unsigned values will be casted to signed: `uchar 254 => char -2`. For all types except 64-bit.

Definition at line 882 of file intrin_cpp.hpp.

bool cv::v_check_any ( const v_reg< _Tp, n > &  a )

Check if any of packed values is less than zero.

Unsigned values will be casted to signed: `uchar 254 => char -2`. For all types except 64-bit.

Definition at line 894 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_combine_high ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Combine vector from last elements of two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A3 A4 B3 B4}

For all types except 64-bit.

Definition at line 1255 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_combine_low ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Combine vector from first elements of two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 A2 B1 B2}

For all types except 64-bit.

Definition at line 1233 of file intrin_cpp.hpp.

v_reg<float, n> cv::v_cvt_f32 ( const v_reg< int, n > &  a )

Convert to float.

Supported input type is cv::v_int32x4.

Definition at line 1412 of file intrin_cpp.hpp.

v_reg<double, n> cv::v_cvt_f64 ( const v_reg< float, n *2 > &  a )

Convert to double.

Supported input type is cv::v_float32x4.

Definition at line 1434 of file intrin_cpp.hpp.

v_reg<double, n> cv::v_cvt_f64 ( const v_reg< int, n *2 > &  a )

Convert to double.

Supported input type is cv::v_int32x4.

Definition at line 1423 of file intrin_cpp.hpp.

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_dotprod ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Dot product of elements.

Multiply values in two registers and sum adjacent result pairs. Scheme:

  {A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
{A1B1+A2B2 ...} // 32-bit

Implemented only for 16-bit signed source type (v_int16x8).

Definition at line 773 of file intrin_cpp.hpp.

void cv::v_expand ( const v_reg< _Tp, n > &  a,
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &  b0,
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &  b1 
)

Expand values to the wider pack type.

Copy contents of register to two registers with 2x wider pack type. Scheme:

 int32x4     int64x2 int64x2
{A B C D} ==> {A B} , {C D}

Definition at line 932 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_extract ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Vector extract.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
========================
shift = 1  {A2 A3 A4 B1}
shift = 2  {A3 A4 B1 B2}
shift = 3  {A4 B1 B2 B3}

Restriction: 0 <= shift < nlanes

Usage:

v_int32x4 a, b, c;
c = v_extract<2>(a, b);

For integer types only.

Definition at line 1305 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_floor ( const v_reg< double, n > &  a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1374 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_floor ( const v_reg< float, n > &  a )

Floor.

Floor each value. Input type is float vector ==> output type is int vector.

Definition at line 1331 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_invsqrt ( const v_reg< _Tp, n > &  a )

Inversed square root.

Returns $ 1/sqrt(a) $ For floating point types only.

Definition at line 712 of file intrin_cpp.hpp.

v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load ( const _Tp *  ptr )

Load register contents from memory.

Parameters:
ptrpointer to memory block with data
Returns:
register object
Note:
Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.

Definition at line 998 of file intrin_cpp.hpp.

v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load_aligned ( const _Tp *  ptr )

Load register contents from memory (aligned)

similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)

Definition at line 1008 of file intrin_cpp.hpp.

void cv::v_load_deinterleave ( const _Tp *  ptr,
v_reg< _Tp, n > &  a,
v_reg< _Tp, n > &  b,
v_reg< _Tp, n > &  c,
v_reg< _Tp, n > &  d 
)

Load and deinterleave (3 channels)

Load data from memory deinterleave and store to 3 registers. Scheme:

{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}

For all types except 64-bit.

Definition at line 1107 of file intrin_cpp.hpp.

void cv::v_load_deinterleave ( const _Tp *  ptr,
v_reg< _Tp, n > &  a,
v_reg< _Tp, n > &  b,
v_reg< _Tp, n > &  c 
)

Load and deinterleave (4 channels)

Load data from memory deinterleave and store to 4 registers. Scheme:

{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}

For all types except 64-bit.

Definition at line 1086 of file intrin_cpp.hpp.

v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2> cv::v_load_expand ( const _Tp *  ptr )

Load register contents from memory with double expand.

Same as cv::v_load, but result pack type will be 2x wider than memory type.

 {.cpp}
short buf[4] = {1, 2, 3, 4}; // type is int16
v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32

For 8-, 16-, 32-bit integer source types.

Definition at line 1046 of file intrin_cpp.hpp.

v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4> cv::v_load_expand_q ( const _Tp *  ptr )

Load register contents from memory with quad expand.

Same as cv::v_load_expand, but result type is 4 times wider than source.

 {.cpp}
char buf[4] = {1, 2, 3, 4}; // type is int8
v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32

For 8-bit integer source types.

Definition at line 1067 of file intrin_cpp.hpp.

v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load_halves ( const _Tp *  loptr,
const _Tp *  hiptr 
)

Load register contents from two memory blocks.

Parameters:
loptrmemory block containing data for first half (0..n/2)
hiptrmemory block containing data for second half (n/2..n)
 {.cpp}
int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };
v_int32x4 r = v_load_halves(lo, hi);

Definition at line 1024 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_magnitude ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Magnitude.

Returns $ sqrt(a^2 + b^2) $ For floating point types only.

Definition at line 725 of file intrin_cpp.hpp.

pack pack pack pack_u v_float32x4 cv::v_matmul ( const v_float32x4 &  v,
const v_float32x4 &  m0,
const v_float32x4 &  m1,
const v_float32x4 &  m2,
const v_float32x4 &  m3 
)

Matrix multiplication.

Scheme:

{A0 A1 A2 A3}   |V0|
{B0 B1 B2 B3}   |V1|
{C0 C1 C2 C3}   |V2|
{D0 D1 D2 D3} x |V3|
====================
{R0 R1 R2 R3}, where:
R0 = A0V0 + A1V1 + A2V2 + A3V3,
R1 = B0V0 + B1V1 + B2V2 + B3V3
...

Definition at line 1724 of file intrin_cpp.hpp.

void cv::v_mul_expand ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b,
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &  c,
v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &  d 
)

Multiply and expand.

Multiply values two registers and store results in two registers with wider pack type. Scheme:

  {A B C D} // 32-bit
x {E F G H} // 32-bit
---------------
{AE BF}         // 64-bit
        {CG DH} // 64-bit

Example:

 {.cpp}
v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
v_uint64x2 c, d; // results
v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}

Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).

Definition at line 801 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_muladd ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b,
const v_reg< _Tp, n > &  c 
)

Multiply and add.

Returns $ a*b + c $ For floating point types only.

Definition at line 751 of file intrin_cpp.hpp.

void cv::v_recombine ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b,
v_reg< _Tp, n > &  low,
v_reg< _Tp, n > &  high 
)

Combine two vectors from lower and higher parts of two other vectors.

 {.cpp}
low = cv::v_combine_low(a, b);
high = cv::v_combine_high(a, b);

Definition at line 1273 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_round ( const v_reg< float, n > &  a )

Round.

Rounds each value. Input type is float vector ==> output type is int vector.

Definition at line 1320 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_round ( const v_reg< double, n > &  a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1362 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_select ( const v_reg< _Tp, n > &  mask,
const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Bitwise select.

Return value will be built by combining values a and b using the following scheme: If the i-th bit in _mask_ is 1 select i-th bit from _a_ else select i-th bit from _b_

Definition at line 909 of file intrin_cpp.hpp.

int cv::v_signmask ( const v_reg< _Tp, n > &  a )

Get negative values mask.

Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. Example:

 {.cpp}
v_int32x4 r; // set to {-1, -1, 1, 1}
int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011

For all types except 64-bit.

Definition at line 870 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_sqr_magnitude ( const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b 
)

Square of the magnitude.

Returns $ a^2 + b^2 $ For floating point types only.

Definition at line 738 of file intrin_cpp.hpp.

void cv::v_store ( _Tp *  ptr,
const v_reg< _Tp, n > &  a 
)

Store data to memory.

Store register contents to memory. Scheme:

  REG {A B C D} ==> MEM {A B C D}

Pointer can be unaligned.

Definition at line 1173 of file intrin_cpp.hpp.

void cv::v_store_aligned ( _Tp *  ptr,
const v_reg< _Tp, n > &  a 
)

Store data to memory (aligned)

Store register contents to memory. Scheme:

  REG {A B C D} ==> MEM {A B C D}

Pointer __should__ be aligned by 16-byte boundary.

Definition at line 1216 of file intrin_cpp.hpp.

void cv::v_store_high ( _Tp *  ptr,
const v_reg< _Tp, n > &  a 
)

Store data to memory (higher half)

Store higher half of register contents to memory. Scheme:

  REG {A B C D} ==> MEM {C D}

Definition at line 1201 of file intrin_cpp.hpp.

void cv::v_store_interleave ( _Tp *  ptr,
const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b,
const v_reg< _Tp, n > &  c,
const v_reg< _Tp, n > &  d 
)

Interleave and store (4 channels)

Interleave and store data from 4 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}

For all types except 64-bit.

Definition at line 1150 of file intrin_cpp.hpp.

void cv::v_store_interleave ( _Tp *  ptr,
const v_reg< _Tp, n > &  a,
const v_reg< _Tp, n > &  b,
const v_reg< _Tp, n > &  c 
)

Interleave and store (3 channels)

Interleave and store data from 3 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}

For all types except 64-bit.

Definition at line 1130 of file intrin_cpp.hpp.

void cv::v_store_low ( _Tp *  ptr,
const v_reg< _Tp, n > &  a 
)

Store data to memory (lower half)

Store lower half of register contents to memory. Scheme:

  REG {A B C D} ==> MEM {A B}

Definition at line 1187 of file intrin_cpp.hpp.

void cv::v_transpose4x4 ( v_reg< _Tp, 4 > &  a0,
const v_reg< _Tp, 4 > &  a1,
const v_reg< _Tp, 4 > &  a2,
const v_reg< _Tp, 4 > &  a3,
v_reg< _Tp, 4 > &  b0,
v_reg< _Tp, 4 > &  b1,
v_reg< _Tp, 4 > &  b2,
v_reg< _Tp, 4 > &  b3 
)

Transpose 4x4 matrix.

Scheme:

a0  {A1 A2 A3 A4}
a1  {B1 B2 B3 B4}
a2  {C1 C2 C3 C4}
a3  {D1 D2 D3 D4}
===============
b0  {A1 B1 C1 D1}
b1  {A2 B2 C2 D2}
b2  {A3 B3 C3 D3}
b3  {A4 B4 C4 D4}

Definition at line 1458 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_trunc ( const v_reg< double, n > &  a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1398 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_trunc ( const v_reg< float, n > &  a )

Trunc.

Truncate each value. Input type is float vector ==> output type is int vector.

Definition at line 1353 of file intrin_cpp.hpp.

void cv::v_zip ( const v_reg< _Tp, n > &  a0,
const v_reg< _Tp, n > &  a1,
v_reg< _Tp, n > &  b0,
v_reg< _Tp, n > &  b1 
)

Interleave two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 B1 A2 B2} and {A3 B3 A4 B4}

For all types except 64-bit.

Definition at line 974 of file intrin_cpp.hpp.