"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms. More...

Modules
	Private implementation helpers
Typedefs
typedef v_reg< uchar, 16 >	v_uint8x16
	Sixteen 8-bit unsigned integer values.
typedef v_reg< unsigned, 4 >	v_uint32x4
	Four 32-bit unsigned integer values.
typedef v_reg< float, 4 >	v_float32x4
	Four 32-bit floating point values (single precision)
typedef v_reg< double, 2 >	v_float64x2
	Two 64-bit floating point values (double precision)
typedef v_reg< uint64, 2 >	v_uint64x2
	Two 64-bit unsigned integer values.
typedef v_reg< int64, 2 >	v_int64x2
	Two 64-bit signed integer values.
Functions
	OPENCV_HAL_IMPL_MATH_FUNC (v_sqrt, std::sqrt, _Tp) OPENCV_HAL_IMPL_MATH_FUNC(v_abs
	Square root of elements.
V_TypeTraits< _Tp >::abs_type V_TypeTraits< _Tp >::abs_type	OPENCV_HAL_IMPL_MATH_FUNC (v_round, cvRound, int) OPENCV_HAL_IMPL_MATH_FUNC(v_floor
	Round elements.
V_TypeTraits< _Tp >::abs_type V_TypeTraits< _Tp >::abs_type int	OPENCV_HAL_IMPL_MATH_FUNC (v_ceil, cvCeil, int) OPENCV_HAL_IMPL_MATH_FUNC(v_trunc
	Ceil elements.
	OPENCV_HAL_IMPL_MINMAX_FUNC (v_min, std::min) OPENCV_HAL_IMPL_MINMAX_FUNC(v_max
	Choose min values for each pair.
std::max	OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC (v_reduce_min, std::min) OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC(v_reduce_max
	Find one min value.
	OPENCV_HAL_IMPL_CMP_OP (<) OPENCV_HAL_IMPL_CMP_OP(>) OPENCV_HAL_IMPL_CMP_OP(<
	Less-than comparison.
	OPENCV_HAL_IMPL_ADD_SUB_OP (v_add_wrap,+,(_Tp), _Tp) OPENCV_HAL_IMPL_ADD_SUB_OP(v_sub_wrap
	Add values without saturation.
template<typename _Tp , int n>
_Tp v_reg< typename V_TypeTraits< _Tp >::abs_type, n >	v_absdiff (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Absolute difference.
v_float32x4	v_absdiff (const v_float32x4 &a, const v_float32x4 &b)
	This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 32-bit floating point values.
template<typename _Tp , int n>
v_reg< _Tp, n >	v_invsqrt (const v_reg< _Tp, n > &a)
	Inversed square root.
template<typename _Tp , int n>
v_reg< _Tp, n >	v_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Magnitude.
template<typename _Tp , int n>
v_reg< _Tp, n >	v_sqr_magnitude (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Square of the magnitude.
template<typename _Tp , int n>
v_reg< _Tp, n >	v_muladd (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	Multiply and add.
template<typename _Tp , int n>
v_reg< typename V_TypeTraits < _Tp >::w_type, n/2 >	v_dotprod (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Dot product of elements.
template<typename _Tp , int n>
void	v_mul_expand (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &c, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &d)
	Multiply and expand.
	OPENCV_HAL_IMPL_SHIFT_OP (<<) OPENCV_HAL_IMPL_SHIFT_OP(>>) template< typename _Tp
	Bitwise shift left.
template<typename _Tp , int n>
int	v_signmask (const v_reg< _Tp, n > &a)
	Get negative values mask.
template<typename _Tp , int n>
bool	v_check_all (const v_reg< _Tp, n > &a)
	Check if all packed values are less than zero.
template<typename _Tp , int n>
bool	v_check_any (const v_reg< _Tp, n > &a)
	Check if any of packed values is less than zero.
template<typename _Tp , int n>
v_reg< _Tp, n >	v_select (const v_reg< _Tp, n > &mask, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Bitwise select.
template<typename _Tp , int n>
void	v_expand (const v_reg< _Tp, n > &a, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b0, v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &b1)
	Expand values to the wider pack type.
template<typename _Tp , int n>
void	v_zip (const v_reg< _Tp, n > &a0, const v_reg< _Tp, n > &a1, v_reg< _Tp, n > &b0, v_reg< _Tp, n > &b1)
	Interleave two vectors.
template<typename _Tp >
v_reg< _Tp, V_SIMD128Traits < _Tp >::nlanes >	v_load (const _Tp *ptr)
	Load register contents from memory.
template<typename _Tp >
v_reg< _Tp, V_SIMD128Traits < _Tp >::nlanes >	v_load_aligned (const _Tp *ptr)
	Load register contents from memory (aligned)
template<typename _Tp >
v_reg< _Tp, V_SIMD128Traits < _Tp >::nlanes >	v_load_halves (const _Tp loptr, const _Tp hiptr)
	Load register contents from two memory blocks.
template<typename _Tp >
v_reg< typename V_TypeTraits < _Tp >::w_type, V_SIMD128Traits< _Tp >::nlanes/2 >	v_load_expand (const _Tp *ptr)
	Load register contents from memory with double expand.
template<typename _Tp >
v_reg< typename V_TypeTraits < _Tp >::q_type, V_SIMD128Traits< _Tp >::nlanes/4 >	v_load_expand_q (const _Tp *ptr)
	Load register contents from memory with quad expand.
template<typename _Tp , int n>
void	v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c)
	Load and deinterleave (4 channels)
template<typename _Tp , int n>
void	v_load_deinterleave (const _Tp *ptr, v_reg< _Tp, n > &a, v_reg< _Tp, n > &b, v_reg< _Tp, n > &c, v_reg< _Tp, n > &d)
	Load and deinterleave (3 channels)
template<typename _Tp , int n>
void	v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c)
	Interleave and store (3 channels)
template<typename _Tp , int n>
void	v_store_interleave (_Tp *ptr, const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, const v_reg< _Tp, n > &c, const v_reg< _Tp, n > &d)
	Interleave and store (4 channels)
template<typename _Tp , int n>
void	v_store (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory.
template<typename _Tp , int n>
void	v_store_low (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (lower half)
template<typename _Tp , int n>
void	v_store_high (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (higher half)
template<typename _Tp , int n>
void	v_store_aligned (_Tp *ptr, const v_reg< _Tp, n > &a)
	Store data to memory (aligned)
template<typename _Tp , int n>
v_reg< _Tp, n >	v_combine_low (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Combine vector from first elements of two vectors.
template<typename _Tp , int n>
v_reg< _Tp, n >	v_combine_high (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Combine vector from last elements of two vectors.
template<typename _Tp , int n>
void	v_recombine (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b, v_reg< _Tp, n > &low, v_reg< _Tp, n > &high)
	Combine two vectors from lower and higher parts of two other vectors.
template<int s, typename _Tp , int n>
v_reg< _Tp, n >	v_extract (const v_reg< _Tp, n > &a, const v_reg< _Tp, n > &b)
	Vector extract.
template<int n>
v_reg< int, n >	v_round (const v_reg< float, n > &a)
	Round.
template<int n>
v_reg< int, n >	v_floor (const v_reg< float, n > &a)
	Floor.
template<int n>
v_reg< int, n >	v_ceil (const v_reg< float, n > &a)
	Ceil.
template<int n>
v_reg< int, n >	v_trunc (const v_reg< float, n > &a)
	Trunc.
template<int n>
v_reg< int, n *2 >	v_round (const v_reg< double, n > &a)
template<int n>
v_reg< int, n *2 >	v_floor (const v_reg< double, n > &a)
template<int n>
v_reg< int, n *2 >	v_ceil (const v_reg< double, n > &a)
template<int n>
v_reg< int, n *2 >	v_trunc (const v_reg< double, n > &a)
template<int n>
v_reg< float, n >	v_cvt_f32 (const v_reg< int, n > &a)
	Convert to float.
template<int n>
v_reg< double, n >	v_cvt_f64 (const v_reg< int, n *2 > &a)
	Convert to double.
template<int n>
v_reg< double, n >	v_cvt_f64 (const v_reg< float, n *2 > &a)
	Convert to double.
template<typename _Tp >
void	v_transpose4x4 (v_reg< _Tp, 4 > &a0, const v_reg< _Tp, 4 > &a1, const v_reg< _Tp, 4 > &a2, const v_reg< _Tp, 4 > &a3, v_reg< _Tp, 4 > &b0, v_reg< _Tp, 4 > &b1, v_reg< _Tp, 4 > &b2, v_reg< _Tp, 4 > &b3)
	Transpose 4x4 matrix.
pack pack pack pack_u v_float32x4	v_matmul (const v_float32x4 &v, const v_float32x4 &m0, const v_float32x4 &m1, const v_float32x4 &m2, const v_float32x4 &m3)
	Matrix multiplication.
Pack
Pack values from two vectors to one Return vector type have twice more elements than input vector types. Variant with _u_ suffix also converts to corresponding unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types
typedef v_reg< schar, 16 >	v_int8x16
	Sixteen 8-bit signed integer values.
typedef v_reg< ushort, 8 >	v_uint16x8
	Eight 16-bit unsigned integer values.
typedef v_reg< short, 8 >	v_int16x8
	Eight 16-bit signed integer values.
typedef v_reg< int, 4 >	v_int32x4
	Four 32-bit signed integer values.
pack pack pack	ushort
	OPENCV_HAL_IMPL_C_PACK (v_uint16x8, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_PACK(v_int16x8
pack	OPENCV_HAL_IMPL_C_PACK (v_uint32x4, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK(v_int32x4
pack pack	OPENCV_HAL_IMPL_C_PACK (v_uint64x2, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK(v_int64x2
pack pack pack	OPENCV_HAL_IMPL_C_PACK (v_int16x8, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK(v_int32x4
Init with zero
Create new vector with zero elements
V_TypeTraits< _Tp >::abs_type V_TypeTraits< _Tp >::abs_type int	int
	schar
s8	short
s8 s16 s32	double
s8 s16 s32 f64	int64
	OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int8x16
s8	OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int16x8
s8 s16	OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int32x4
s8 s16 s32	OPENCV_HAL_IMPL_C_INIT_ZERO (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_INIT_ZERO(v_float64x2
s8 s16 s32 f64	OPENCV_HAL_IMPL_C_INIT_ZERO (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_INIT_ZERO(v_int64x2
Init with value
Create new vector with elements set to a specific value
	OPENCV_HAL_IMPL_C_INIT_VAL (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_INIT_VAL(v_int8x16
s8	OPENCV_HAL_IMPL_C_INIT_VAL (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_INIT_VAL(v_int16x8
s8 s16	OPENCV_HAL_IMPL_C_INIT_VAL (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_INIT_VAL(v_int32x4
s8 s16 s32	OPENCV_HAL_IMPL_C_INIT_VAL (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_INIT_VAL(v_float64x2
s8 s16 s32 f64	OPENCV_HAL_IMPL_C_INIT_VAL (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_INIT_VAL(v_int64x2
Reinterpret
Convert vector to different type without modifying underlying data.
	OPENCV_HAL_IMPL_C_REINTERPRET (v_uint8x16, uchar, u8) OPENCV_HAL_IMPL_C_REINTERPRET(v_int8x16
s8	OPENCV_HAL_IMPL_C_REINTERPRET (v_uint16x8, ushort, u16) OPENCV_HAL_IMPL_C_REINTERPRET(v_int16x8
s8 s16	OPENCV_HAL_IMPL_C_REINTERPRET (v_uint32x4, unsigned, u32) OPENCV_HAL_IMPL_C_REINTERPRET(v_int32x4
s8 s16 s32	OPENCV_HAL_IMPL_C_REINTERPRET (v_float32x4, float, f32) OPENCV_HAL_IMPL_C_REINTERPRET(v_float64x2
s8 s16 s32 f64	OPENCV_HAL_IMPL_C_REINTERPRET (v_uint64x2, uint64, u64) OPENCV_HAL_IMPL_C_REINTERPRET(v_int64x2
Left shift
Shift left
	OPENCV_HAL_IMPL_C_SHIFTL (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_SHIFTL(v_int16x8
short	OPENCV_HAL_IMPL_C_SHIFTL (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_SHIFTL(v_int32x4
short int	OPENCV_HAL_IMPL_C_SHIFTL (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_SHIFTL(v_int64x2
Right shift
Shift right
	OPENCV_HAL_IMPL_C_SHIFTR (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_SHIFTR(v_int16x8
short	OPENCV_HAL_IMPL_C_SHIFTR (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_SHIFTR(v_int32x4
short int	OPENCV_HAL_IMPL_C_SHIFTR (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_SHIFTR(v_int64x2
Rounding shift
Rounding shift right
	OPENCV_HAL_IMPL_C_RSHIFTR (v_uint16x8, ushort) OPENCV_HAL_IMPL_C_RSHIFTR(v_int16x8
short	OPENCV_HAL_IMPL_C_RSHIFTR (v_uint32x4, unsigned) OPENCV_HAL_IMPL_C_RSHIFTR(v_int32x4
short int	OPENCV_HAL_IMPL_C_RSHIFTR (v_uint64x2, uint64) OPENCV_HAL_IMPL_C_RSHIFTR(v_int64x2
Pack with rounding shift
Pack values from two vectors to one with rounding shift Values from the input vectors will be shifted right by _n_ bits with rounding, converted to narrower type and returned in the result vector. Variant with _u_ suffix converts to unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types
	OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int16x8
pack	OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4
pack pack	OPENCV_HAL_IMPL_C_RSHR_PACK (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int64x2
pack pack pack	OPENCV_HAL_IMPL_C_RSHR_PACK (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK(v_int32x4
Pack and store
Store values from the input vector into memory with pack Values will be stored into memory with saturating conversion to narrower type. Variant with _u_ suffix converts to corresponding unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types
	OPENCV_HAL_IMPL_C_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int16x8
pack	OPENCV_HAL_IMPL_C_PACK_STORE (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4
pack pack	OPENCV_HAL_IMPL_C_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_PACK_STORE(v_int64x2
pack pack pack	OPENCV_HAL_IMPL_C_PACK_STORE (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_PACK_STORE(v_int32x4
Pack and store with rounding shift
Store values from the input vector into memory with pack Values will be shifted _n_ bits right with rounding, converted to narrower type and stored into memory. Variant with _u_ suffix converts to unsigned type. pack: for 16-, 32- and 64-bit integer input types pack_u: for 16- and 32-bit signed integer input types
	OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint16x8, ushort, v_uint8x16, uchar, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int16x8
pack	OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint32x4, unsigned, v_uint16x8, ushort, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4
pack pack	OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_uint64x2, uint64, v_uint32x4, unsigned, pack) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int64x2
pack pack pack	OPENCV_HAL_IMPL_C_RSHR_PACK_STORE (v_int16x8, short, v_uint8x16, uchar, pack_u) OPENCV_HAL_IMPL_C_RSHR_PACK_STORE(v_int32x4

Detailed Description

"Universal intrinsics" is a types and functions set intended to simplify vectorization of code on different platforms.

Currently there are two supported SIMD extensions: __SSE/SSE2__ on x86 architectures and __NEON__ on ARM architectures, both allow working with 128 bit registers containing packed values of different types. In case when there is no SIMD extension available during compilation, fallback C++ implementation of intrinsics will be chosen and code will work as expected although it could be slower.

### Types

There are several types representing 128-bit register as a vector of packed values, each type is implemented as a structure based on a one SIMD register.

cv::v_uint8x16 and cv::v_int8x16: sixteen 8-bit integer values (unsigned/signed) - char
cv::v_uint16x8 and cv::v_int16x8: eight 16-bit integer values (unsigned/signed) - short
cv::v_uint32x4 and cv::v_int32x4: four 32-bit integer values (unsgined/signed) - int
cv::v_uint64x2 and cv::v_int64x2: two 64-bit integer values (unsigned/signed) - int64
cv::v_float32x4: four 32-bit floating point values (signed) - float
cv::v_float64x2: two 64-bit floating point valies (signed) - double

Note:

cv::v_float64x2 is not implemented in NEON variant, if you want to use this type, don't forget to check the CV_SIMD128_64F preprocessor definition:

#if CV_SIMD128_64F
//...
#endif

### Load and store operations

These operations allow to set contents of the register explicitly or by loading it from some memory block and to save contents of the register to memory block.

Constructors: from memory, from two values, ...
Other create methods: v_setall_s8, v_setall_u8, ..., v_setzero_u8, v_setzero_s8, ...
Memory operations: v_load, v_load_aligned, v_load_halves, v_store, v_store_aligned, v_store_high, v_store_low

### Value reordering

These operations allow to reorder or recombine elements in one or multiple vectors.

Interleave, deinterleave (3 and 4 channels): v_load_deinterleave, v_store_interleave
Expand: v_load_expand, v_load_expand_q, v_expand
Pack: v_pack, v_pack_u, v_rshr_pack, v_rshr_pack_u, v_pack_store, v_pack_u_store, v_rshr_pack_store, v_rshr_pack_u_store
Recombine: v_zip, v_recombine, v_combine_low, v_combine_high
Extract: v_extract

### Arithmetic, bitwise and comparison operations

Element-wise binary and unary operations.

Arithmetics: operator+(const v_reg &a, const v_reg &b) "+", operator-(const v_reg &a, const v_reg &b) "-", operator*(const v_reg &a, const v_reg &b) "*", operator/(const v_reg &a, const v_reg &b) "/", v_mul_expand

Non-saturating arithmetics: v_add_wrap, v_sub_wrap

Bitwise shifts: operator<<(const v_reg &a, int s) "<<", operator>>(const v_reg &a, int s) ">>", v_shl, v_shr

Bitwise logic: operator&(const v_reg &a, const v_reg &b) "&", operator|(const v_reg &a, const v_reg &b) "|", operator^(const v_reg &a, const v_reg &b) "^", operator~(const v_reg &a) "~"

Comparison: operator>(const v_reg &a, const v_reg &b) ">", operator>=(const v_reg &a, const v_reg &b) ">=", operator<(const v_reg &a, const v_reg &b) "<", operator<=(const v_reg &a, const v_reg &b) "<=", operator==(const v_reg &a, const v_reg &b) "==", operator!=(const v_reg &a, const v_reg &b) "!="

min/max: v_min, v_max

### Reduce and mask

Most of these operations return only one value.

Reduce: v_reduce_min, v_reduce_max, v_reduce_sum
Mask: v_signmask, v_check_all, v_check_any, v_select

### Other math

Some frequent operations: v_sqrt, v_invsqrt, v_magnitude, v_sqr_magnitude
Absolute values: v_abs, v_absdiff

### Conversions

Different type conversions and casts:

Rounding: v_round, v_floor, v_ceil, v_trunc,
To float: v_cvt_f32, v_cvt_f64
Reinterpret: v_reinterpret_as_u8, v_reinterpret_as_s8, ...

### Matrix operations

In these operations vectors represent matrix rows/columns: v_dotprod, v_matmul, v_transpose4x4

### Usability

Most operations are implemented only for some subset of the available types, following matrices shows the applicability of different operations to the types.

Regular integers:

| Operations\Types | uint 8x16 | int 8x16 | uint 16x8 | int 16x8 | uint 32x4 | int 32x4 | |-------------------|:-:|:-:|:-:|:-:|:-:|:-:| |load, store | x | x | x | x | x | x | |interleave | x | x | x | x | x | x | |expand | x | x | x | x | x | x | |expand_q | x | x | | | | | |add, sub | x | x | x | x | x | x | |add_wrap, sub_wrap | x | x | x | x | | | |mul | | | x | x | x | x | |mul_expand | | | x | x | x | | |compare | x | x | x | x | x | x | |shift | | | x | x | x | x | |dotprod | | | | x | | | |logical | x | x | x | x | x | x | |min, max | x | x | x | x | x | x | |absdiff | x | x | x | x | x | x | |reduce | | | | | x | x | |mask | x | x | x | x | x | x | |pack | x | x | x | x | x | x | |pack_u | x | | x | | | | |unpack | x | x | x | x | x | x | |extract | x | x | x | x | x | x | |cvt_flt32 | | | | | | x | |cvt_flt64 | | | | | | x | |transpose4x4 | | | | | x | x |

Big integers:

| Operations\Types | uint 64x2 | int 64x2 | |-------------------|:-:|:-:| |load, store | x | x | |add, sub | x | x | |shift | x | x | |logical | x | x | |extract | x | x |

Floating point:

| Operations\Types | float 32x4 | float 64x2 | |-------------------|:-:|:-:| |load, store | x | x | |interleave | x | | |add, sub | x | x | |mul | x | x | |div | x | x | |compare | x | x | |min, max | x | x | |absdiff | x | x | |reduce | x | | |mask | x | x | |unpack | x | x | |cvt_flt32 | | x | |cvt_flt64 | x | | |sqrt, abs | x | x | |float math | x | x | |transpose4x4 | x | |

Typedef Documentation

typedef v_reg<float, 4> v_float32x4

Four 32-bit floating point values (single precision)

Definition at line 366 of file intrin_cpp.hpp.

typedef v_reg<double, 2> v_float64x2

Two 64-bit floating point values (double precision)

Definition at line 368 of file intrin_cpp.hpp.

pack v_int16x8

Eight 16-bit signed integer values.

Definition at line 360 of file intrin_cpp.hpp.

pack pack v_int32x4

Four 32-bit signed integer values.

Definition at line 364 of file intrin_cpp.hpp.

typedef v_reg<int64, 2> v_int64x2

Two 64-bit signed integer values.

Definition at line 372 of file intrin_cpp.hpp.

v_int8x16

Sixteen 8-bit signed integer values.

Definition at line 356 of file intrin_cpp.hpp.

pack pack pack v_uint16x8

Eight 16-bit unsigned integer values.

Definition at line 358 of file intrin_cpp.hpp.

typedef v_reg<unsigned, 4> v_uint32x4

Four 32-bit unsigned integer values.

Definition at line 362 of file intrin_cpp.hpp.

typedef v_reg<uint64, 2> v_uint64x2

Two 64-bit unsigned integer values.

Definition at line 370 of file intrin_cpp.hpp.

typedef v_reg<uchar, 16> v_uint8x16

Sixteen 8-bit unsigned integer values.

Definition at line 354 of file intrin_cpp.hpp.

Function Documentation

cv::OPENCV_HAL_IMPL_ADD_SUB_OP	(	v_add_wrap	,
		+	,
		(_Tp)	,
		_Tp
	)

Add values without saturation.

For 8- and 16-bit integer values. Subtract values without saturation

For 8- and 16-bit integer values.

cv::OPENCV_HAL_IMPL_CMP_OP ( )

Less-than comparison.

For all types except 64-bit integer values. Greater-than comparison

For all types except 64-bit integer values. Less-than or equal comparison

For all types except 64-bit integer values.

V_TypeTraits<_Tp>::abs_type V_TypeTraits<_Tp>::abs_type int cv::OPENCV_HAL_IMPL_MATH_FUNC	(	v_ceil	,
		cvCeil	,
		int
	)

Ceil elements.

Only for floating point types. Truncate elements

Only for floating point types.

cv::OPENCV_HAL_IMPL_MATH_FUNC	(	v_sqrt	,
		std::sqrt	,
		_Tp
	)

Square root of elements.

Only for floating point types. Absolute value of elements

Only for floating point types.

V_TypeTraits<_Tp>::abs_type V_TypeTraits<_Tp>::abs_type cv::OPENCV_HAL_IMPL_MATH_FUNC	(	v_round	,
		cvRound	,
		int
	)

Round elements.

Only for floating point types. Floor elements

Only for floating point types.

cv::OPENCV_HAL_IMPL_MINMAX_FUNC	(	v_min	,
		std::min
	)

Choose min values for each pair.

Scheme:

{A1 A2 ...}
{B1 B2 ...}
--------------
{min(A1,B1) min(A2,B2) ...}

For all types except 64-bit integer. Choose max values for each pair

Scheme:

{A1 A2 ...}
{B1 B2 ...}
--------------
{max(A1,B1) max(A2,B2) ...}

For all types except 64-bit integer.

std::max cv::OPENCV_HAL_IMPL_REDUCE_MINMAX_FUNC	(	v_reduce_min	,
		std::min
	)

Find one min value.

Scheme:

{A1 A2 A3 ...} => min(A1,A2,A3,...)

For 32-bit integer and 32-bit floating point types. Find one max value

Scheme:

{A1 A2 A3 ...} => max(A1,A2,A3,...)

For 32-bit integer and 32-bit floating point types.

cv::OPENCV_HAL_IMPL_SHIFT_OP ( << )

Bitwise shift left.

For 16-, 32- and 64-bit integer values. Bitwise shift right

For 16-, 32- and 64-bit integer values. Sum packed values

Scheme:

{A1 A2 A3 ...} => sum{A1,A2,A3,...}

For 32-bit integer and 32-bit floating point types.

_Tp v_reg<typename V_TypeTraits<_Tp>::abs_type, n> cv::v_absdiff	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Absolute difference.

Returns $ |a - b| $ converted to corresponding unsigned type. Example:

 {.cpp}
v_int32x4 a, b; // {1, 2, 3, 4} and {4, 3, 2, 1}
v_uint32x4 c = v_absdiff(a, b); // result is {3, 1, 1, 3}

For 8-, 16-, 32-bit integer source types.

Definition at line 671 of file intrin_cpp.hpp.

v_float64x2 v_absdiff	(	const v_float32x4 &	a,
		const v_float32x4 &	b
	)

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 32-bit floating point values.

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts. For 64-bit floating point values.

Definition at line 688 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_ceil ( const v_reg< double, n > & a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1386 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_ceil ( const v_reg< float, n > & a )

Ceil.

Ceil each value. Input type is float vector ==> output type is int vector.

Definition at line 1342 of file intrin_cpp.hpp.

bool cv::v_check_all ( const v_reg< _Tp, n > & a )

Check if all packed values are less than zero.

Unsigned values will be casted to signed: `uchar 254 => char -2`. For all types except 64-bit.

Definition at line 882 of file intrin_cpp.hpp.

bool cv::v_check_any ( const v_reg< _Tp, n > & a )

Check if any of packed values is less than zero.

Unsigned values will be casted to signed: `uchar 254 => char -2`. For all types except 64-bit.

Definition at line 894 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_combine_high	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Combine vector from last elements of two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A3 A4 B3 B4}

For all types except 64-bit.

Definition at line 1255 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_combine_low	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Combine vector from first elements of two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 A2 B1 B2}

For all types except 64-bit.

Definition at line 1233 of file intrin_cpp.hpp.

v_reg<float, n> cv::v_cvt_f32 ( const v_reg< int, n > & a )

Convert to float.

Supported input type is cv::v_int32x4.

Definition at line 1412 of file intrin_cpp.hpp.

v_reg<double, n> cv::v_cvt_f64 ( const v_reg< float, n *2 > & a )

Convert to double.

Supported input type is cv::v_float32x4.

Definition at line 1434 of file intrin_cpp.hpp.

v_reg<double, n> cv::v_cvt_f64 ( const v_reg< int, n *2 > & a )

Convert to double.

Supported input type is cv::v_int32x4.

Definition at line 1423 of file intrin_cpp.hpp.

v_reg<typename V_TypeTraits<_Tp>::w_type, n/2> cv::v_dotprod	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Dot product of elements.

Multiply values in two registers and sum adjacent result pairs. Scheme:

  {A1 A2 ...} // 16-bit
x {B1 B2 ...} // 16-bit
-------------
{A1B1+A2B2 ...} // 32-bit

Implemented only for 16-bit signed source type (v_int16x8).

Definition at line 773 of file intrin_cpp.hpp.

void cv::v_expand	(	const v_reg< _Tp, n > &	a,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b0,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	b1
	)

Expand values to the wider pack type.

Copy contents of register to two registers with 2x wider pack type. Scheme:

 int32x4     int64x2 int64x2
{A B C D} ==> {A B} , {C D}

Definition at line 932 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_extract	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Vector extract.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
========================
shift = 1  {A2 A3 A4 B1}
shift = 2  {A3 A4 B1 B2}
shift = 3  {A4 B1 B2 B3}

Restriction: 0 <= shift < nlanes

Usage:

v_int32x4 a, b, c;
c = v_extract<2>(a, b);

For integer types only.

Definition at line 1305 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_floor ( const v_reg< double, n > & a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1374 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_floor ( const v_reg< float, n > & a )

Floor.

Floor each value. Input type is float vector ==> output type is int vector.

Definition at line 1331 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_invsqrt ( const v_reg< _Tp, n > & a )

Inversed square root.

Returns $ 1/sqrt(a) $ For floating point types only.

Definition at line 712 of file intrin_cpp.hpp.

v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load ( const _Tp * ptr )

Load register contents from memory.

Parameters:

ptr	pointer to memory block with data

Returns:: register object

Note:: Returned type will be detected from passed pointer type, for example uchar ==> cv::v_uint8x16, int ==> cv::v_int32x4, etc.

Definition at line 998 of file intrin_cpp.hpp.

v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load_aligned ( const _Tp * ptr )

Load register contents from memory (aligned)

similar to cv::v_load, but source memory block should be aligned (to 16-byte boundary)

Definition at line 1008 of file intrin_cpp.hpp.

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c,
		v_reg< _Tp, n > &	d
	)

Load and deinterleave (3 channels)

Load data from memory deinterleave and store to 3 registers. Scheme:

{A1 B1 C1 A2 B2 C2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}

For all types except 64-bit.

Definition at line 1107 of file intrin_cpp.hpp.

void cv::v_load_deinterleave	(	const _Tp *	ptr,
		v_reg< _Tp, n > &	a,
		v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	c
	)

Load and deinterleave (4 channels)

Load data from memory deinterleave and store to 4 registers. Scheme:

{A1 B1 C1 D1 A2 B2 C2 D2 ...} ==> {A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...}

For all types except 64-bit.

Definition at line 1086 of file intrin_cpp.hpp.

v_reg<typename V_TypeTraits<_Tp>::w_type, V_SIMD128Traits<_Tp>::nlanes / 2> cv::v_load_expand ( const _Tp * ptr )

Load register contents from memory with double expand.

Same as cv::v_load, but result pack type will be 2x wider than memory type.

 {.cpp}
short buf[4] = {1, 2, 3, 4}; // type is int16
v_int32x4 r = v_load_expand(buf); // r = {1, 2, 3, 4} - type is int32

For 8-, 16-, 32-bit integer source types.

Definition at line 1046 of file intrin_cpp.hpp.

v_reg<typename V_TypeTraits<_Tp>::q_type, V_SIMD128Traits<_Tp>::nlanes / 4> cv::v_load_expand_q ( const _Tp * ptr )

Load register contents from memory with quad expand.

Same as cv::v_load_expand, but result type is 4 times wider than source.

 {.cpp}
char buf[4] = {1, 2, 3, 4}; // type is int8
v_int32x4 r = v_load_q(buf); // r = {1, 2, 3, 4} - type is int32

For 8-bit integer source types.

Definition at line 1067 of file intrin_cpp.hpp.

v_reg<_Tp, V_SIMD128Traits<_Tp>::nlanes> cv::v_load_halves	(	const _Tp *	loptr,
		const _Tp *	hiptr
	)

Load register contents from two memory blocks.

Parameters:

loptr	memory block containing data for first half (0..n/2)
hiptr	memory block containing data for second half (n/2..n)

 {.cpp}
int lo[2] = { 1, 2 }, hi[2] = { 3, 4 };
v_int32x4 r = v_load_halves(lo, hi);

Definition at line 1024 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Magnitude.

Returns $ sqrt(a^2 + b^2) $ For floating point types only.

Definition at line 725 of file intrin_cpp.hpp.

pack pack pack pack_u v_float32x4 cv::v_matmul	(	const v_float32x4 &	v,
		const v_float32x4 &	m0,
		const v_float32x4 &	m1,
		const v_float32x4 &	m2,
		const v_float32x4 &	m3
	)

Matrix multiplication.

Scheme:

{A0 A1 A2 A3}   |V0|
{B0 B1 B2 B3}   |V1|
{C0 C1 C2 C3}   |V2|
{D0 D1 D2 D3} x |V3|
====================
{R0 R1 R2 R3}, where:
R0 = A0V0 + A1V1 + A2V2 + A3V3,
R1 = B0V0 + B1V1 + B2V2 + B3V3
...

Definition at line 1724 of file intrin_cpp.hpp.

void cv::v_mul_expand	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	c,
		v_reg< typename V_TypeTraits< _Tp >::w_type, n/2 > &	d
	)

Multiply and expand.

Multiply values two registers and store results in two registers with wider pack type. Scheme:

  {A B C D} // 32-bit
x {E F G H} // 32-bit
---------------
{AE BF}         // 64-bit
        {CG DH} // 64-bit

Example:

 {.cpp}
v_uint32x4 a, b; // {1,2,3,4} and {2,2,2,2}
v_uint64x2 c, d; // results
v_mul_expand(a, b, c, d); // c, d = {2,4}, {6, 8}

Implemented only for 16- and unsigned 32-bit source types (v_int16x8, v_uint16x8, v_uint32x4).

Definition at line 801 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_muladd	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c
	)

Multiply and add.

Returns $ a*b + c $ For floating point types only.

Definition at line 751 of file intrin_cpp.hpp.

void cv::v_recombine	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		v_reg< _Tp, n > &	low,
		v_reg< _Tp, n > &	high
	)

Combine two vectors from lower and higher parts of two other vectors.

 {.cpp}
low = cv::v_combine_low(a, b);
high = cv::v_combine_high(a, b);

Definition at line 1273 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_round ( const v_reg< float, n > & a )

Round.

Rounds each value. Input type is float vector ==> output type is int vector.

Definition at line 1320 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_round ( const v_reg< double, n > & a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1362 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_select	(	const v_reg< _Tp, n > &	mask,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Bitwise select.

Return value will be built by combining values a and b using the following scheme: If the i-th bit in _mask_ is 1 select i-th bit from _a_ else select i-th bit from _b_

Definition at line 909 of file intrin_cpp.hpp.

int cv::v_signmask ( const v_reg< _Tp, n > & a )

Get negative values mask.

Returned value is a bit mask with bits set to 1 on places corresponding to negative packed values indexes. Example:

 {.cpp}
v_int32x4 r; // set to {-1, -1, 1, 1}
int mask = v_signmask(r); // mask = 3 <== 00000000 00000000 00000000 00000011

For all types except 64-bit.

Definition at line 870 of file intrin_cpp.hpp.

v_reg<_Tp, n> cv::v_sqr_magnitude	(	const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b
	)

Square of the magnitude.

Returns $ a^2 + b^2 $ For floating point types only.

Definition at line 738 of file intrin_cpp.hpp.

void cv::v_store	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

Store data to memory.

Store register contents to memory. Scheme:

  REG {A B C D} ==> MEM {A B C D}

Pointer can be unaligned.

Definition at line 1173 of file intrin_cpp.hpp.

void cv::v_store_aligned	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

Store data to memory (aligned)

Store register contents to memory. Scheme:

  REG {A B C D} ==> MEM {A B C D}

Pointer __should__ be aligned by 16-byte boundary.

Definition at line 1216 of file intrin_cpp.hpp.

void cv::v_store_high	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

Store data to memory (higher half)

Store higher half of register contents to memory. Scheme:

  REG {A B C D} ==> MEM {C D}

Definition at line 1201 of file intrin_cpp.hpp.

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c,
		const v_reg< _Tp, n > &	d
	)

Interleave and store (4 channels)

Interleave and store data from 4 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}

For all types except 64-bit.

Definition at line 1150 of file intrin_cpp.hpp.

void cv::v_store_interleave	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a,
		const v_reg< _Tp, n > &	b,
		const v_reg< _Tp, n > &	c
	)

Interleave and store (3 channels)

Interleave and store data from 3 registers to memory. Scheme:

{A1 A2 ...}, {B1 B2 ...}, {C1 C2 ...}, {D1 D2 ...} ==> {A1 B1 C1 D1 A2 B2 C2 D2 ...}

For all types except 64-bit.

Definition at line 1130 of file intrin_cpp.hpp.

void cv::v_store_low	(	_Tp *	ptr,
		const v_reg< _Tp, n > &	a
	)

Store data to memory (lower half)

Store lower half of register contents to memory. Scheme:

  REG {A B C D} ==> MEM {A B}

Definition at line 1187 of file intrin_cpp.hpp.

void cv::v_transpose4x4	(	v_reg< _Tp, 4 > &	a0,
		const v_reg< _Tp, 4 > &	a1,
		const v_reg< _Tp, 4 > &	a2,
		const v_reg< _Tp, 4 > &	a3,
		v_reg< _Tp, 4 > &	b0,
		v_reg< _Tp, 4 > &	b1,
		v_reg< _Tp, 4 > &	b2,
		v_reg< _Tp, 4 > &	b3
	)

Transpose 4x4 matrix.

Scheme:

a0  {A1 A2 A3 A4}
a1  {B1 B2 B3 B4}
a2  {C1 C2 C3 C4}
a3  {D1 D2 D3 D4}
===============
b0  {A1 B1 C1 D1}
b1  {A2 B2 C2 D2}
b2  {A3 B3 C3 D3}
b3  {A4 B4 C4 D4}

Definition at line 1458 of file intrin_cpp.hpp.

v_reg<int, n*2> cv::v_trunc ( const v_reg< double, n > & a )

This is an overloaded member function, provided for convenience. It differs from the above function only in what argument(s) it accepts.

Definition at line 1398 of file intrin_cpp.hpp.

v_reg<int, n> cv::v_trunc ( const v_reg< float, n > & a )

Trunc.

Truncate each value. Input type is float vector ==> output type is int vector.

Definition at line 1353 of file intrin_cpp.hpp.

void cv::v_zip	(	const v_reg< _Tp, n > &	a0,
		const v_reg< _Tp, n > &	a1,
		v_reg< _Tp, n > &	b0,
		v_reg< _Tp, n > &	b1
	)

Interleave two vectors.

Scheme:

  {A1 A2 A3 A4}
  {B1 B2 B3 B4}
---------------
  {A1 B1 A2 B2} and {A3 B3 A4 B4}

For all types except 64-bit.

Definition at line 974 of file intrin_cpp.hpp.

Type:	Program
Created:	04 Jul 2017
Imports:	5
Forks:	1
Commits:	171
Dependents:	0
Dependencies:	0
Followers:	4

Universal intrinsics
[Hardware Acceleration Layer]

Modules

Typedefs

Functions

Pack

Init with zero

Init with value

Reinterpret

Left shift

Right shift

Rounding shift

Pack with rounding shift

Pack and store

Pack and store with rounding shift

Detailed Description

Typedef Documentation

Function Documentation

Repository toolbox

Repository details

Important Information for this Arm website

Universal intrinsics [Hardware Acceleration Layer]

Modules

Typedefs

Functions

Pack

Init with zero

Init with value

Reinterpret

Left shift

Right shift

Rounding shift

Pack with rounding shift

Pack and store

Pack and store with rounding shift

Detailed Description

Typedef Documentation

Function Documentation

Repository toolbox

Repository details

Important Information for this Arm website

Access Warning

Universal intrinsics
[Hardware Acceleration Layer]