Opencv 3.1 project on GR-PEACH board
Fork of gr-peach-opencv-project by
demosaicing.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 00046 #include <limits> 00047 00048 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) 00049 00050 namespace cv 00051 { 00052 00053 00054 //////////////////////////// Bayer Pattern -> RGB conversion ///////////////////////////// 00055 00056 template<typename T> 00057 class SIMDBayerStubInterpolator_ 00058 { 00059 public: 00060 int bayer2Gray(const T*, int, T*, int, int, int, int) const 00061 { 00062 return 0; 00063 } 00064 00065 int bayer2RGB(const T*, int, T*, int, int) const 00066 { 00067 return 0; 00068 } 00069 00070 int bayer2RGBA(const T*, int, T*, int, int) const 00071 { 00072 return 0; 00073 } 00074 00075 int bayer2RGB_EA(const T*, int, T*, int, int) const 00076 { 00077 return 0; 00078 } 00079 }; 00080 00081 #if CV_SSE2 00082 class SIMDBayerInterpolator_8u 00083 { 00084 public: 00085 SIMDBayerInterpolator_8u() 00086 { 00087 use_simd = checkHardwareSupport(CV_CPU_SSE2); 00088 } 00089 00090 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, 00091 int width, int bcoeff, int gcoeff, int rcoeff) const 00092 { 00093 if( !use_simd ) 00094 return 0; 00095 00096 __m128i _b2y = _mm_set1_epi16((short)(rcoeff*2)); 00097 __m128i _g2y = _mm_set1_epi16((short)(gcoeff*2)); 00098 __m128i _r2y = _mm_set1_epi16((short)(bcoeff*2)); 00099 const uchar* bayer_end = bayer + width; 00100 00101 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) 00102 { 00103 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 00104 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 00105 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 00106 00107 __m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7), 00108 _mm_srli_epi16(_mm_slli_epi16(r2, 8), 7)); 00109 __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2)); 00110 b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1); 00111 00112 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7)); 00113 __m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7); 00114 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); 00115 g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2); 00116 00117 r0 = _mm_srli_epi16(r1, 8); 00118 r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2); 00119 r0 = _mm_slli_epi16(r0, 3); 00120 00121 g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y)); 00122 g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y)); 00123 g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y)); 00124 g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y)); 00125 g0 = _mm_srli_epi16(g0, 2); 00126 g1 = _mm_srli_epi16(g1, 2); 00127 g0 = _mm_packus_epi16(g0, g0); 00128 g1 = _mm_packus_epi16(g1, g1); 00129 g0 = _mm_unpacklo_epi8(g0, g1); 00130 _mm_storeu_si128((__m128i*)dst, g0); 00131 } 00132 00133 return (int)(bayer - (bayer_end - width)); 00134 } 00135 00136 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00137 { 00138 if( !use_simd ) 00139 return 0; 00140 /* 00141 B G B G | B G B G | B G B G | B G B G 00142 G R G R | G R G R | G R G R | G R G R 00143 B G B G | B G B G | B G B G | B G B G 00144 */ 00145 00146 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); 00147 __m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128(); 00148 __m128i masklo = _mm_set1_epi16(0x00ff); 00149 const uchar* bayer_end = bayer + width; 00150 00151 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) 00152 { 00153 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 00154 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 00155 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 00156 00157 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo)); 00158 __m128i nextb1 = _mm_srli_si128(b1, 2); 00159 __m128i b0 = _mm_add_epi16(b1, nextb1); 00160 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); 00161 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); 00162 // b0 b2 ... b14 b1 b3 ... b15 00163 b0 = _mm_packus_epi16(b0, b1); 00164 00165 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8)); 00166 __m128i g1 = _mm_and_si128(r1, masklo); 00167 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); 00168 g1 = _mm_srli_si128(g1, 2); 00169 g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2); 00170 // g0 g2 ... g14 g1 g3 ... g15 00171 g0 = _mm_packus_epi16(g0, g1); 00172 00173 r0 = _mm_srli_epi16(r1, 8); 00174 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); 00175 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); 00176 // r0 r2 ... r14 r1 r3 ... r15 00177 r0 = _mm_packus_epi16(r0, r1); 00178 00179 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); 00180 b0 = _mm_xor_si128(b0, b1); 00181 r0 = _mm_xor_si128(r0, b1); 00182 00183 // b1 g1 b3 g3 b5 g5... 00184 b1 = _mm_unpackhi_epi8(b0, g0); 00185 // b0 g0 b2 g2 b4 g4 .... 00186 b0 = _mm_unpacklo_epi8(b0, g0); 00187 00188 // r1 0 r3 0 r5 0 ... 00189 r1 = _mm_unpackhi_epi8(r0, z); 00190 // r0 0 r2 0 r4 0 ... 00191 r0 = _mm_unpacklo_epi8(r0, z); 00192 00193 // 0 b0 g0 r0 0 b2 g2 r2 ... 00194 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); 00195 // 0 b8 g8 r8 0 b10 g10 r10 ... 00196 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); 00197 00198 // b1 g1 r1 0 b3 g3 r3 0 ... 00199 r0 = _mm_unpacklo_epi16(b1, r1); 00200 // b9 g9 r9 0 b11 g11 r11 0 ... 00201 r1 = _mm_unpackhi_epi16(b1, r1); 00202 00203 // 0 b0 g0 r0 b1 g1 r1 0 ... 00204 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); 00205 // 0 b4 g4 r4 b5 g5 r5 0 ... 00206 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); 00207 00208 _mm_storel_epi64((__m128i*)(dst-1+0), b0); 00209 _mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8)); 00210 _mm_storel_epi64((__m128i*)(dst-1+6*2), b1); 00211 _mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8)); 00212 00213 // 0 b8 g8 r8 b9 g9 r9 0 ... 00214 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); 00215 // 0 b12 g12 r12 b13 g13 r13 0 ... 00216 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); 00217 00218 _mm_storel_epi64((__m128i*)(dst-1+6*4), g0); 00219 _mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8)); 00220 00221 _mm_storel_epi64((__m128i*)(dst-1+6*6), g1); 00222 } 00223 00224 return (int)(bayer - (bayer_end - width)); 00225 } 00226 00227 int bayer2RGBA(const uchar*, int, uchar*, int, int) const 00228 { 00229 return 0; 00230 } 00231 00232 int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00233 { 00234 if (!use_simd) 00235 return 0; 00236 00237 const uchar* bayer_end = bayer + width; 00238 __m128i masklow = _mm_set1_epi16(0x00ff); 00239 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); 00240 __m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128(); 00241 __m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0); 00242 00243 for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42) 00244 { 00245 /* 00246 B G B G | B G B G | B G B G | B G B G 00247 G R G R | G R G R | G R G R | G R G R 00248 B G B G | B G B G | B G B G | B G B G 00249 */ 00250 00251 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 00252 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 00253 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 00254 00255 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow)); 00256 __m128i nextb1 = _mm_srli_si128(b1, 2); 00257 __m128i b0 = _mm_add_epi16(b1, nextb1); 00258 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); 00259 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); 00260 // b0 b2 ... b14 b1 b3 ... b15 00261 b0 = _mm_packus_epi16(b0, b1); 00262 00263 // vertical sum 00264 __m128i r0g = _mm_srli_epi16(r0, 8); 00265 __m128i r2g = _mm_srli_epi16(r2, 8); 00266 __m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1); 00267 // gorizontal sum 00268 __m128i g1 = _mm_and_si128(masklow, r1); 00269 __m128i nextg1 = _mm_srli_si128(g1, 2); 00270 __m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1); 00271 00272 // gradients 00273 __m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g)); 00274 __m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1)); 00275 __m128i gmask = _mm_cmpgt_epi16(gradg, gradv); 00276 00277 __m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full))); 00278 // g0 g2 ... g14 g1 g3 ... 00279 g0 = _mm_packus_epi16(g0, nextg1); 00280 00281 r0 = _mm_srli_epi16(r1, 8); 00282 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); 00283 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); 00284 // r0 r2 ... r14 r1 r3 ... r15 00285 r0 = _mm_packus_epi16(r0, r1); 00286 00287 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); 00288 b0 = _mm_xor_si128(b0, b1); 00289 r0 = _mm_xor_si128(r0, b1); 00290 00291 // b1 g1 b3 g3 b5 g5... 00292 b1 = _mm_unpackhi_epi8(b0, g0); 00293 // b0 g0 b2 g2 b4 g4 .... 00294 b0 = _mm_unpacklo_epi8(b0, g0); 00295 00296 // r1 0 r3 0 r5 0 ... 00297 r1 = _mm_unpackhi_epi8(r0, z); 00298 // r0 0 r2 0 r4 0 ... 00299 r0 = _mm_unpacklo_epi8(r0, z); 00300 00301 // 0 b0 g0 r0 0 b2 g2 r2 ... 00302 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); 00303 // 0 b8 g8 r8 0 b10 g10 r10 ... 00304 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); 00305 00306 // b1 g1 r1 0 b3 g3 r3 0 ... 00307 r0 = _mm_unpacklo_epi16(b1, r1); 00308 // b9 g9 r9 0 b11 g11 r11 0 ... 00309 r1 = _mm_unpackhi_epi16(b1, r1); 00310 00311 // 0 b0 g0 r0 b1 g1 r1 0 ... 00312 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); 00313 // 0 b4 g4 r4 b5 g5 r5 0 ... 00314 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); 00315 00316 _mm_storel_epi64((__m128i*)(dst+0), b0); 00317 _mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8)); 00318 _mm_storel_epi64((__m128i*)(dst+6*2), b1); 00319 _mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8)); 00320 00321 // 0 b8 g8 r8 b9 g9 r9 0 ... 00322 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); 00323 // 0 b12 g12 r12 b13 g13 r13 0 ... 00324 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); 00325 00326 _mm_storel_epi64((__m128i*)(dst+6*4), g0); 00327 _mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8)); 00328 00329 _mm_storel_epi64((__m128i*)(dst+6*6), g1); 00330 } 00331 00332 return int(bayer - (bayer_end - width)); 00333 } 00334 00335 bool use_simd; 00336 }; 00337 #elif CV_NEON 00338 class SIMDBayerInterpolator_8u 00339 { 00340 public: 00341 SIMDBayerInterpolator_8u() 00342 { 00343 } 00344 00345 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, 00346 int width, int bcoeff, int gcoeff, int rcoeff) const 00347 { 00348 /* 00349 B G B G | B G B G | B G B G | B G B G 00350 G R G R | G R G R | G R G R | G R G R 00351 B G B G | B G B G | B G B G | B G B G 00352 */ 00353 00354 uint16x8_t masklo = vdupq_n_u16(255); 00355 const uchar* bayer_end = bayer + width; 00356 00357 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) 00358 { 00359 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 00360 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 00361 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 00362 00363 uint16x8_t b1_ = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 00364 uint16x8_t b1 = vextq_u16(b1_, b1_, 1); 00365 uint16x8_t b0 = vaddq_u16(b1_, b1); 00366 // b0 = b0 b2 b4 ... 00367 // b1 = b1 b3 b5 ... 00368 00369 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 00370 uint16x8_t g1 = vandq_u16(r1, masklo); 00371 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 00372 uint16x8_t rot = vextq_u16(g1, g1, 1); 00373 g1 = vshlq_n_u16(rot, 2); 00374 // g0 = b0 b2 b4 ... 00375 // g1 = b1 b3 b5 ... 00376 00377 r0 = vshrq_n_u16(r1, 8); 00378 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 00379 r0 = vshlq_n_u16(r0, 2); 00380 // r0 = r0 r2 r4 ... 00381 // r1 = r1 r3 r5 ... 00382 00383 b0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b0), (short)(rcoeff*2))); 00384 b1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b1), (short)(rcoeff*4))); 00385 00386 g0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g0), (short)(gcoeff*2))); 00387 g1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g1), (short)(gcoeff*2))); 00388 00389 r0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r0), (short)(bcoeff*2))); 00390 r1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r1), (short)(bcoeff*4))); 00391 00392 g0 = vaddq_u16(vaddq_u16(g0, b0), r0); 00393 g1 = vaddq_u16(vaddq_u16(g1, b1), r1); 00394 00395 uint8x8x2_t p = vzip_u8(vrshrn_n_u16(g0, 2), vrshrn_n_u16(g1, 2)); 00396 vst1_u8(dst, p.val[0]); 00397 vst1_u8(dst + 8, p.val[1]); 00398 } 00399 00400 return (int)(bayer - (bayer_end - width)); 00401 } 00402 00403 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00404 { 00405 /* 00406 B G B G | B G B G | B G B G | B G B G 00407 G R G R | G R G R | G R G R | G R G R 00408 B G B G | B G B G | B G B G | B G B G 00409 */ 00410 uint16x8_t masklo = vdupq_n_u16(255); 00411 uint8x16x3_t pix; 00412 const uchar* bayer_end = bayer + width; 00413 00414 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) 00415 { 00416 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 00417 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 00418 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 00419 00420 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 00421 uint16x8_t nextb1 = vextq_u16(b1, b1, 1); 00422 uint16x8_t b0 = vaddq_u16(b1, nextb1); 00423 // b0 b1 b2 ... 00424 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1)); 00425 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]); 00426 00427 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 00428 uint16x8_t g1 = vandq_u16(r1, masklo); 00429 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 00430 g1 = vextq_u16(g1, g1, 1); 00431 // g0 g1 g2 ... 00432 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1)); 00433 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]); 00434 00435 r0 = vshrq_n_u16(r1, 8); 00436 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 00437 // r0 r1 r2 ... 00438 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1)); 00439 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]); 00440 00441 vst3q_u8(dst-1, pix); 00442 } 00443 00444 return (int)(bayer - (bayer_end - width)); 00445 } 00446 00447 int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00448 { 00449 /* 00450 B G B G | B G B G | B G B G | B G B G 00451 G R G R | G R G R | G R G R | G R G R 00452 B G B G | B G B G | B G B G | B G B G 00453 */ 00454 uint16x8_t masklo = vdupq_n_u16(255); 00455 uint8x16x4_t pix; 00456 const uchar* bayer_end = bayer + width; 00457 pix.val[3] = vdupq_n_u8(255); 00458 00459 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 ) 00460 { 00461 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 00462 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 00463 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 00464 00465 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 00466 uint16x8_t nextb1 = vextq_u16(b1, b1, 1); 00467 uint16x8_t b0 = vaddq_u16(b1, nextb1); 00468 // b0 b1 b2 ... 00469 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1)); 00470 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]); 00471 00472 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 00473 uint16x8_t g1 = vandq_u16(r1, masklo); 00474 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 00475 g1 = vextq_u16(g1, g1, 1); 00476 // g0 g1 g2 ... 00477 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1)); 00478 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]); 00479 00480 r0 = vshrq_n_u16(r1, 8); 00481 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 00482 // r0 r1 r2 ... 00483 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1)); 00484 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]); 00485 00486 vst4q_u8(dst-1, pix); 00487 } 00488 00489 return (int)(bayer - (bayer_end - width)); 00490 } 00491 00492 int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const 00493 { 00494 return 0; 00495 } 00496 }; 00497 #else 00498 typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u; 00499 #endif 00500 00501 00502 template<typename T, class SIMDInterpolator> 00503 class Bayer2Gray_Invoker : 00504 public ParallelLoopBody 00505 { 00506 public: 00507 Bayer2Gray_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, bool _brow, 00508 const Size& _size, int _bcoeff, int _rcoeff) : 00509 ParallelLoopBody(), srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), 00510 Brow(_brow), size(_size), Bcoeff(_bcoeff), Rcoeff(_rcoeff) 00511 { 00512 } 00513 00514 virtual void operator ()(const Range& range) const 00515 { 00516 SIMDInterpolator vecOp; 00517 const int G2Y = 9617; 00518 const int SHIFT = 14; 00519 00520 const T* bayer0 = srcmat.ptr<T>(); 00521 int bayer_step = (int)(srcmat.step/sizeof(T)); 00522 T* dst0 = (T*)dstmat.data; 00523 int dst_step = (int)(dstmat.step/sizeof(T)); 00524 int bcoeff = Bcoeff, rcoeff = Rcoeff; 00525 int start_with_green = Start_with_green; 00526 bool brow = Brow; 00527 00528 dst0 += dst_step + 1; 00529 00530 if (range.start % 2) 00531 { 00532 brow = !brow; 00533 std::swap(bcoeff, rcoeff); 00534 start_with_green = !start_with_green; 00535 } 00536 00537 bayer0 += range.start * bayer_step; 00538 dst0 += range.start * dst_step; 00539 00540 for(int i = range.start ; i < range.end; ++i, bayer0 += bayer_step, dst0 += dst_step ) 00541 { 00542 unsigned t0, t1, t2; 00543 const T* bayer = bayer0; 00544 T* dst = dst0; 00545 const T* bayer_end = bayer + size.width; 00546 00547 if( size.width <= 0 ) 00548 { 00549 dst[-1] = dst[size.width] = 0; 00550 continue; 00551 } 00552 00553 if( start_with_green ) 00554 { 00555 t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff; 00556 t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff; 00557 t2 = bayer[bayer_step+1]*(2*G2Y); 00558 00559 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1); 00560 bayer++; 00561 dst++; 00562 } 00563 00564 int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff); 00565 bayer += delta; 00566 dst += delta; 00567 00568 for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 ) 00569 { 00570 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff; 00571 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y; 00572 t2 = bayer[bayer_step+1]*(4*bcoeff); 00573 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2); 00574 00575 t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff; 00576 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff; 00577 t2 = bayer[bayer_step+2]*(2*G2Y); 00578 dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1); 00579 } 00580 00581 if( bayer < bayer_end ) 00582 { 00583 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff; 00584 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y; 00585 t2 = bayer[bayer_step+1]*(4*bcoeff); 00586 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2); 00587 bayer++; 00588 dst++; 00589 } 00590 00591 dst0[-1] = dst0[0]; 00592 dst0[size.width] = dst0[size.width-1]; 00593 00594 brow = !brow; 00595 std::swap(bcoeff, rcoeff); 00596 start_with_green = !start_with_green; 00597 } 00598 } 00599 00600 private: 00601 Mat srcmat; 00602 Mat dstmat; 00603 int Start_with_green; 00604 bool Brow; 00605 Size size; 00606 int Bcoeff, Rcoeff; 00607 }; 00608 00609 template<typename T, typename SIMDInterpolator> 00610 static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code ) 00611 { 00612 const int R2Y = 4899; 00613 const int B2Y = 1868; 00614 00615 Size size = srcmat.size(); 00616 int bcoeff = B2Y, rcoeff = R2Y; 00617 int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY; 00618 bool brow = true; 00619 00620 if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY ) 00621 { 00622 brow = false; 00623 std::swap(bcoeff, rcoeff); 00624 } 00625 size.height -= 2; 00626 size.width -= 2; 00627 00628 if (size.height > 0) 00629 { 00630 Range range(0, size.height); 00631 Bayer2Gray_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, 00632 start_with_green, brow, size, bcoeff, rcoeff); 00633 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16)); 00634 } 00635 00636 size = dstmat.size(); 00637 T* dst0 = dstmat.ptr<T>(); 00638 int dst_step = (int)(dstmat.step/sizeof(T)); 00639 if( size.height > 2 ) 00640 for( int i = 0; i < size.width; i++ ) 00641 { 00642 dst0[i] = dst0[i + dst_step]; 00643 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step]; 00644 } 00645 else 00646 for( int i = 0; i < size.width; i++ ) 00647 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0; 00648 } 00649 00650 template <typename T> 00651 struct Alpha 00652 { 00653 static T value() { return std::numeric_limits<T>::max(); } 00654 }; 00655 00656 template <> 00657 struct Alpha<float> 00658 { 00659 static float value() { return 1.0f; } 00660 }; 00661 00662 template <typename T, typename SIMDInterpolator> 00663 class Bayer2RGB_Invoker : 00664 public ParallelLoopBody 00665 { 00666 public: 00667 Bayer2RGB_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, int _blue, const Size& _size) : 00668 ParallelLoopBody(), 00669 srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), Blue(_blue), size(_size) 00670 { 00671 } 00672 00673 virtual void operator() (const Range& range) const 00674 { 00675 SIMDInterpolator vecOp; 00676 T alpha = Alpha<T>::value(); 00677 int dcn = dstmat.channels(); 00678 int dcn2 = dcn << 1; 00679 00680 int bayer_step = (int)(srcmat.step/sizeof(T)); 00681 const T* bayer0 = srcmat.ptr<T>() + bayer_step * range.start; 00682 00683 int dst_step = (int)(dstmat.step/sizeof(T)); 00684 T* dst0 = reinterpret_cast<T*>(dstmat.data) + (range.start + 1) * dst_step + dcn + 1; 00685 00686 int blue = Blue, start_with_green = Start_with_green; 00687 if (range.start % 2) 00688 { 00689 blue = -blue; 00690 start_with_green = !start_with_green; 00691 } 00692 00693 for (int i = range.start; i < range.end; bayer0 += bayer_step, dst0 += dst_step, ++i ) 00694 { 00695 int t0, t1; 00696 const T* bayer = bayer0; 00697 T* dst = dst0; 00698 const T* bayer_end = bayer + size.width; 00699 00700 // in case of when size.width <= 2 00701 if( size.width <= 0 ) 00702 { 00703 if (dcn == 3) 00704 { 00705 dst[-4] = dst[-3] = dst[-2] = dst[size.width*dcn-1] = 00706 dst[size.width*dcn] = dst[size.width*dcn+1] = 0; 00707 } 00708 else 00709 { 00710 dst[-5] = dst[-4] = dst[-3] = dst[size.width*dcn-1] = 00711 dst[size.width*dcn] = dst[size.width*dcn+1] = 0; 00712 dst[-2] = dst[size.width*dcn+2] = alpha; 00713 } 00714 continue; 00715 } 00716 00717 if( start_with_green ) 00718 { 00719 t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1; 00720 t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1; 00721 00722 dst[-blue] = (T)t0; 00723 dst[0] = bayer[bayer_step+1]; 00724 dst[blue] = (T)t1; 00725 if (dcn == 4) 00726 dst[2] = alpha; // alpha channel 00727 00728 bayer++; 00729 dst += dcn; 00730 } 00731 00732 // simd optimization only for dcn == 3 00733 int delta = dcn == 4 ? 00734 vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) : 00735 vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue); 00736 bayer += delta; 00737 dst += delta*dcn; 00738 00739 if (dcn == 3) // Bayer to BGR 00740 { 00741 if( blue > 0 ) 00742 { 00743 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00744 { 00745 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00746 bayer[bayer_step*2+2] + 2) >> 2; 00747 t1 = (bayer[1] + bayer[bayer_step] + 00748 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00749 dst[-1] = (T)t0; 00750 dst[0] = (T)t1; 00751 dst[1] = bayer[bayer_step+1]; 00752 00753 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00754 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00755 dst[2] = (T)t0; 00756 dst[3] = bayer[bayer_step+2]; 00757 dst[4] = (T)t1; 00758 } 00759 } 00760 else 00761 { 00762 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00763 { 00764 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00765 bayer[bayer_step*2+2] + 2) >> 2; 00766 t1 = (bayer[1] + bayer[bayer_step] + 00767 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00768 dst[1] = (T)t0; 00769 dst[0] = (T)t1; 00770 dst[-1] = bayer[bayer_step+1]; 00771 00772 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00773 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00774 dst[4] = (T)t0; 00775 dst[3] = bayer[bayer_step+2]; 00776 dst[2] = (T)t1; 00777 } 00778 } 00779 } 00780 else // Bayer to BGRA 00781 { 00782 // if current row does not contain Blue pixels 00783 if( blue > 0 ) 00784 { 00785 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00786 { 00787 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00788 bayer[bayer_step*2+2] + 2) >> 2; 00789 t1 = (bayer[1] + bayer[bayer_step] + 00790 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00791 dst[-1] = (T)t0; 00792 dst[0] = (T)t1; 00793 dst[1] = bayer[bayer_step+1]; 00794 dst[2] = alpha; // alpha channel 00795 00796 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00797 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00798 dst[3] = (T)t0; 00799 dst[4] = bayer[bayer_step+2]; 00800 dst[5] = (T)t1; 00801 dst[6] = alpha; // alpha channel 00802 } 00803 } 00804 else // if current row contains Blue pixels 00805 { 00806 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00807 { 00808 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00809 bayer[bayer_step*2+2] + 2) >> 2; 00810 t1 = (bayer[1] + bayer[bayer_step] + 00811 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00812 dst[-1] = bayer[bayer_step+1]; 00813 dst[0] = (T)t1; 00814 dst[1] = (T)t0; 00815 dst[2] = alpha; // alpha channel 00816 00817 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00818 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00819 dst[3] = (T)t1; 00820 dst[4] = bayer[bayer_step+2]; 00821 dst[5] = (T)t0; 00822 dst[6] = alpha; // alpha channel 00823 } 00824 } 00825 } 00826 00827 // if skip one pixel at the end of row 00828 if( bayer < bayer_end ) 00829 { 00830 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00831 bayer[bayer_step*2+2] + 2) >> 2; 00832 t1 = (bayer[1] + bayer[bayer_step] + 00833 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00834 dst[-blue] = (T)t0; 00835 dst[0] = (T)t1; 00836 dst[blue] = bayer[bayer_step+1]; 00837 if (dcn == 4) 00838 dst[2] = alpha; // alpha channel 00839 bayer++; 00840 dst += dcn; 00841 } 00842 00843 // fill the last and the first pixels of row accordingly 00844 if (dcn == 3) 00845 { 00846 dst0[-4] = dst0[-1]; 00847 dst0[-3] = dst0[0]; 00848 dst0[-2] = dst0[1]; 00849 dst0[size.width*dcn-1] = dst0[size.width*dcn-4]; 00850 dst0[size.width*dcn] = dst0[size.width*dcn-3]; 00851 dst0[size.width*dcn+1] = dst0[size.width*dcn-2]; 00852 } 00853 else 00854 { 00855 dst0[-5] = dst0[-1]; 00856 dst0[-4] = dst0[0]; 00857 dst0[-3] = dst0[1]; 00858 dst0[-2] = dst0[2]; // alpha channel 00859 dst0[size.width*dcn-1] = dst0[size.width*dcn-5]; 00860 dst0[size.width*dcn] = dst0[size.width*dcn-4]; 00861 dst0[size.width*dcn+1] = dst0[size.width*dcn-3]; 00862 dst0[size.width*dcn+2] = dst0[size.width*dcn-2]; // alpha channel 00863 } 00864 00865 blue = -blue; 00866 start_with_green = !start_with_green; 00867 } 00868 } 00869 00870 private: 00871 Mat srcmat; 00872 Mat dstmat; 00873 int Start_with_green, Blue; 00874 Size size; 00875 }; 00876 00877 template<typename T, class SIMDInterpolator> 00878 static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code ) 00879 { 00880 int dst_step = (int)(dstmat.step/sizeof(T)); 00881 Size size = srcmat.size(); 00882 int blue = code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ? -1 : 1; 00883 int start_with_green = code == CV_BayerGB2BGR || code == CV_BayerGR2BGR; 00884 00885 int dcn = dstmat.channels(); 00886 size.height -= 2; 00887 size.width -= 2; 00888 00889 if (size.height > 0) 00890 { 00891 Range range(0, size.height); 00892 Bayer2RGB_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, start_with_green, blue, size); 00893 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16)); 00894 } 00895 00896 // filling the first and the last rows 00897 size = dstmat.size(); 00898 T* dst0 = dstmat.ptr<T>(); 00899 if( size.height > 2 ) 00900 for( int i = 0; i < size.width*dcn; i++ ) 00901 { 00902 dst0[i] = dst0[i + dst_step]; 00903 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step]; 00904 } 00905 else 00906 for( int i = 0; i < size.width*dcn; i++ ) 00907 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0; 00908 } 00909 00910 00911 /////////////////// Demosaicing using Variable Number of Gradients /////////////////////// 00912 00913 static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code ) 00914 { 00915 const uchar* bayer = srcmat.ptr(); 00916 int bstep = (int)srcmat.step; 00917 uchar* dst = dstmat.ptr(); 00918 int dststep = (int)dstmat.step; 00919 Size size = srcmat.size(); 00920 00921 int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2; 00922 bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG; 00923 00924 // for too small images use the simple interpolation algorithm 00925 if( MIN(size.width, size.height) < 8 ) 00926 { 00927 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code ); 00928 return; 00929 } 00930 00931 const int brows = 3, bcn = 7; 00932 int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7; 00933 int i, bufstep = N7*bcn; 00934 cv::AutoBuffer<ushort> _buf(bufstep*brows); 00935 ushort* buf = (ushort*)_buf; 00936 00937 bayer += bstep*2; 00938 00939 #if CV_SSE2 00940 bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2); 00941 #define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a)) 00942 #endif 00943 00944 for( int y = 2; y < size.height - 4; y++ ) 00945 { 00946 uchar* dstrow = dst + dststep*y + 6; 00947 const uchar* srow; 00948 00949 for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ ) 00950 { 00951 ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1; 00952 srow = bayer + (y+dy)*bstep + 1; 00953 00954 for( i = 0; i < bcn; i++ ) 00955 brow[N*i-1] = brow[(N-2) + N*i] = 0; 00956 00957 i = 1; 00958 00959 #if CV_SSE2 00960 if( haveSSE ) 00961 { 00962 __m128i z = _mm_setzero_si128(); 00963 for( ; i <= N-9; i += 8, srow += 8, brow += 8 ) 00964 { 00965 __m128i s1, s2, s3, s4, s6, s7, s8, s9; 00966 00967 s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z); 00968 s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z); 00969 s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z); 00970 00971 s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z); 00972 s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z); 00973 00974 s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z); 00975 s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z); 00976 s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z); 00977 00978 __m128i b0, b1, b2, b3, b4, b5, b6; 00979 00980 b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1), 00981 _mm_adds_epu16(_mm_absdiff_epu16(s1, s7), 00982 _mm_absdiff_epu16(s3, s9))); 00983 b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1), 00984 _mm_adds_epu16(_mm_absdiff_epu16(s1, s3), 00985 _mm_absdiff_epu16(s7, s9))); 00986 b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1); 00987 b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1); 00988 00989 _mm_storeu_si128((__m128i*)brow, b0); 00990 _mm_storeu_si128((__m128i*)(brow + N), b1); 00991 _mm_storeu_si128((__m128i*)(brow + N2), b2); 00992 _mm_storeu_si128((__m128i*)(brow + N3), b3); 00993 00994 b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4), 00995 _mm_absdiff_epu16(s6, s8))); 00996 b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6), 00997 _mm_absdiff_epu16(s4, s8))); 00998 b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8)); 00999 b6 = _mm_srli_epi16(b6, 1); 01000 01001 _mm_storeu_si128((__m128i*)(brow + N4), b4); 01002 _mm_storeu_si128((__m128i*)(brow + N5), b5); 01003 _mm_storeu_si128((__m128i*)(brow + N6), b6); 01004 } 01005 } 01006 #endif 01007 01008 for( ; i < N-1; i++, srow++, brow++ ) 01009 { 01010 brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) + 01011 std::abs(srow[-bstep] - srow[+bstep])*2 + 01012 std::abs(srow[1-bstep] - srow[1+bstep])); 01013 brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) + 01014 std::abs(srow[-1] - srow[1])*2 + 01015 std::abs(srow[-1+bstep] - srow[1+bstep])); 01016 brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2); 01017 brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2); 01018 brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) + 01019 std::abs(srow[+bstep] - srow[1])); 01020 brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) + 01021 std::abs(srow[+bstep] - srow[-1])); 01022 brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1); 01023 } 01024 } 01025 01026 const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2; 01027 const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2; 01028 const ushort* brow2 = buf + (y % brows)*bufstep + 2; 01029 static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f }; 01030 srow = bayer + y*bstep + 2; 01031 bool greenCell = greenCell0; 01032 01033 i = 2; 01034 #if CV_SSE2 01035 int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2; 01036 #else 01037 int limit = N - 2; 01038 #endif 01039 01040 do 01041 { 01042 for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 ) 01043 { 01044 int gradN = brow0[0] + brow1[0]; 01045 int gradS = brow1[0] + brow2[0]; 01046 int gradW = brow1[N-1] + brow1[N]; 01047 int gradE = brow1[N] + brow1[N+1]; 01048 int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE); 01049 int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE); 01050 int R, G, B; 01051 01052 if( !greenCell ) 01053 { 01054 int gradNE = brow0[N4+1] + brow1[N4]; 01055 int gradSW = brow1[N4] + brow2[N4-1]; 01056 int gradNW = brow0[N5-1] + brow1[N5]; 01057 int gradSE = brow1[N5] + brow2[N5+1]; 01058 01059 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE); 01060 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE); 01061 int T = minGrad + MAX(maxGrad/2, 1); 01062 01063 int Rs = 0, Gs = 0, Bs = 0, ng = 0; 01064 if( gradN < T ) 01065 { 01066 Rs += srow[-bstep*2] + srow[0]; 01067 Gs += srow[-bstep]*2; 01068 Bs += srow[-bstep-1] + srow[-bstep+1]; 01069 ng++; 01070 } 01071 if( gradS < T ) 01072 { 01073 Rs += srow[bstep*2] + srow[0]; 01074 Gs += srow[bstep]*2; 01075 Bs += srow[bstep-1] + srow[bstep+1]; 01076 ng++; 01077 } 01078 if( gradW < T ) 01079 { 01080 Rs += srow[-2] + srow[0]; 01081 Gs += srow[-1]*2; 01082 Bs += srow[-bstep-1] + srow[bstep-1]; 01083 ng++; 01084 } 01085 if( gradE < T ) 01086 { 01087 Rs += srow[2] + srow[0]; 01088 Gs += srow[1]*2; 01089 Bs += srow[-bstep+1] + srow[bstep+1]; 01090 ng++; 01091 } 01092 if( gradNE < T ) 01093 { 01094 Rs += srow[-bstep*2+2] + srow[0]; 01095 Gs += brow0[N6+1]; 01096 Bs += srow[-bstep+1]*2; 01097 ng++; 01098 } 01099 if( gradSW < T ) 01100 { 01101 Rs += srow[bstep*2-2] + srow[0]; 01102 Gs += brow2[N6-1]; 01103 Bs += srow[bstep-1]*2; 01104 ng++; 01105 } 01106 if( gradNW < T ) 01107 { 01108 Rs += srow[-bstep*2-2] + srow[0]; 01109 Gs += brow0[N6-1]; 01110 Bs += srow[-bstep+1]*2; 01111 ng++; 01112 } 01113 if( gradSE < T ) 01114 { 01115 Rs += srow[bstep*2+2] + srow[0]; 01116 Gs += brow2[N6+1]; 01117 Bs += srow[-bstep+1]*2; 01118 ng++; 01119 } 01120 R = srow[0]; 01121 G = R + cvRound((Gs - Rs)*scale[ng]); 01122 B = R + cvRound((Bs - Rs)*scale[ng]); 01123 } 01124 else 01125 { 01126 int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1]; 01127 int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1]; 01128 int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1]; 01129 int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1]; 01130 01131 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE); 01132 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE); 01133 int T = minGrad + MAX(maxGrad/2, 1); 01134 01135 int Rs = 0, Gs = 0, Bs = 0, ng = 0; 01136 if( gradN < T ) 01137 { 01138 Rs += srow[-bstep*2-1] + srow[-bstep*2+1]; 01139 Gs += srow[-bstep*2] + srow[0]; 01140 Bs += srow[-bstep]*2; 01141 ng++; 01142 } 01143 if( gradS < T ) 01144 { 01145 Rs += srow[bstep*2-1] + srow[bstep*2+1]; 01146 Gs += srow[bstep*2] + srow[0]; 01147 Bs += srow[bstep]*2; 01148 ng++; 01149 } 01150 if( gradW < T ) 01151 { 01152 Rs += srow[-1]*2; 01153 Gs += srow[-2] + srow[0]; 01154 Bs += srow[-bstep-2]+srow[bstep-2]; 01155 ng++; 01156 } 01157 if( gradE < T ) 01158 { 01159 Rs += srow[1]*2; 01160 Gs += srow[2] + srow[0]; 01161 Bs += srow[-bstep+2]+srow[bstep+2]; 01162 ng++; 01163 } 01164 if( gradNE < T ) 01165 { 01166 Rs += srow[-bstep*2+1] + srow[1]; 01167 Gs += srow[-bstep+1]*2; 01168 Bs += srow[-bstep] + srow[-bstep+2]; 01169 ng++; 01170 } 01171 if( gradSW < T ) 01172 { 01173 Rs += srow[bstep*2-1] + srow[-1]; 01174 Gs += srow[bstep-1]*2; 01175 Bs += srow[bstep] + srow[bstep-2]; 01176 ng++; 01177 } 01178 if( gradNW < T ) 01179 { 01180 Rs += srow[-bstep*2-1] + srow[-1]; 01181 Gs += srow[-bstep-1]*2; 01182 Bs += srow[-bstep-2]+srow[-bstep]; 01183 ng++; 01184 } 01185 if( gradSE < T ) 01186 { 01187 Rs += srow[bstep*2+1] + srow[1]; 01188 Gs += srow[bstep+1]*2; 01189 Bs += srow[bstep+2]+srow[bstep]; 01190 ng++; 01191 } 01192 G = srow[0]; 01193 R = G + cvRound((Rs - Gs)*scale[ng]); 01194 B = G + cvRound((Bs - Gs)*scale[ng]); 01195 } 01196 dstrow[blueIdx] = cv::saturate_cast<uchar>(B); 01197 dstrow[1] = cv::saturate_cast<uchar>(G); 01198 dstrow[blueIdx^2] = cv::saturate_cast<uchar>(R); 01199 greenCell = !greenCell; 01200 } 01201 01202 #if CV_SSE2 01203 if( !haveSSE ) 01204 break; 01205 01206 __m128i emask = _mm_set1_epi32(0x0000ffff), 01207 omask = _mm_set1_epi32(0xffff0000), 01208 z = _mm_setzero_si128(), 01209 one = _mm_set1_epi16(1); 01210 __m128 _0_5 = _mm_set1_ps(0.5f); 01211 01212 #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA) 01213 #define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f) 01214 #define _mm_cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f) 01215 #define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts 01216 01217 // process 8 pixels at once 01218 for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 ) 01219 { 01220 //int gradN = brow0[0] + brow1[0]; 01221 __m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1)); 01222 01223 //int gradS = brow1[0] + brow2[0]; 01224 __m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2)); 01225 01226 //int gradW = brow1[N-1] + brow1[N]; 01227 __m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N))); 01228 01229 //int gradE = brow1[N+1] + brow1[N]; 01230 __m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N))); 01231 01232 //int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE); 01233 //int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE); 01234 __m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE)); 01235 __m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE)); 01236 01237 __m128i grad0, grad1; 01238 01239 //int gradNE = brow0[N4+1] + brow1[N4]; 01240 //int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1]; 01241 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4))); 01242 grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))), 01243 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1)))); 01244 __m128i gradNE = _mm_merge_epi16(grad0, grad1); 01245 01246 //int gradSW = brow1[N4] + brow2[N4-1]; 01247 //int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1]; 01248 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4))); 01249 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))), 01250 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1)))); 01251 __m128i gradSW = _mm_merge_epi16(grad0, grad1); 01252 01253 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW); 01254 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW); 01255 01256 //int gradNW = brow0[N5-1] + brow1[N5]; 01257 //int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1]; 01258 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5))); 01259 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))), 01260 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1)))); 01261 __m128i gradNW = _mm_merge_epi16(grad0, grad1); 01262 01263 //int gradSE = brow1[N5] + brow2[N5+1]; 01264 //int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1]; 01265 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5))); 01266 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))), 01267 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1)))); 01268 __m128i gradSE = _mm_merge_epi16(grad0, grad1); 01269 01270 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE); 01271 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE); 01272 01273 //int T = minGrad + maxGrad/2; 01274 __m128i T = _mm_adds_epi16(_mm_max_epi16(_mm_srli_epi16(maxGrad, 1), one), minGrad); 01275 01276 __m128i RGs = z, GRs = z, Bs = z, ng = z; 01277 01278 __m128i x0 = _mm_loadl_u8_s16(srow, +0 ); 01279 __m128i x1 = _mm_loadl_u8_s16(srow, -1 - bstep ); 01280 __m128i x2 = _mm_loadl_u8_s16(srow, -1 - bstep*2); 01281 __m128i x3 = _mm_loadl_u8_s16(srow, - bstep ); 01282 __m128i x4 = _mm_loadl_u8_s16(srow, +1 - bstep*2); 01283 __m128i x5 = _mm_loadl_u8_s16(srow, +1 - bstep ); 01284 __m128i x6 = _mm_loadl_u8_s16(srow, +2 - bstep ); 01285 __m128i x7 = _mm_loadl_u8_s16(srow, +1 ); 01286 __m128i x8 = _mm_loadl_u8_s16(srow, +2 + bstep ); 01287 __m128i x9 = _mm_loadl_u8_s16(srow, +1 + bstep ); 01288 __m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2); 01289 __m128i x11 = _mm_loadl_u8_s16(srow, + bstep ); 01290 __m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2); 01291 __m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep ); 01292 __m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep ); 01293 __m128i x15 = _mm_loadl_u8_s16(srow, -1 ); 01294 __m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep ); 01295 01296 __m128i t0, t1, mask; 01297 01298 // gradN *********************************************** 01299 mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN 01300 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradN) 01301 01302 t0 = _mm_slli_epi16(x3, 1); // srow[-bstep]*2 01303 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0); // srow[-bstep*2] + srow[0] 01304 01305 // RGs += (srow[-bstep*2] + srow[0]) * (T>gradN) 01306 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01307 // GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN) 01308 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask)); 01309 // Bs += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN) 01310 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask)); 01311 01312 // gradNE ********************************************** 01313 mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE 01314 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNE) 01315 01316 t0 = _mm_slli_epi16(x5, 1); // srow[-bstep+1]*2 01317 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0); // srow[-bstep*2+2] + srow[0] 01318 01319 // RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE) 01320 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01321 // GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE) 01322 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask)); 01323 // Bs += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])} * (T>gradNE) 01324 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask)); 01325 01326 // gradE *********************************************** 01327 mask = _mm_cmpgt_epi16(T, gradE); // mask = T>gradE 01328 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradE) 01329 01330 t0 = _mm_slli_epi16(x7, 1); // srow[1]*2 01331 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0] 01332 01333 // RGs += (srow[2] + srow[0]) * (T>gradE) 01334 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01335 // GRs += (srow[1]*2) * (T>gradE) 01336 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask)); 01337 // Bs += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE) 01338 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask)); 01339 01340 // gradSE ********************************************** 01341 mask = _mm_cmpgt_epi16(T, gradSE); // mask = T>gradSE 01342 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSE) 01343 01344 t0 = _mm_slli_epi16(x9, 1); // srow[bstep+1]*2 01345 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0] 01346 01347 // RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE) 01348 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01349 // GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE) 01350 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask)); 01351 // Bs += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE) 01352 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask)); 01353 01354 // gradS *********************************************** 01355 mask = _mm_cmpgt_epi16(T, gradS); // mask = T>gradS 01356 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradS) 01357 01358 t0 = _mm_slli_epi16(x11, 1); // srow[bstep]*2 01359 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0] 01360 01361 // RGs += (srow[bstep*2]+srow[0]) * (T>gradS) 01362 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01363 // GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS) 01364 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask)); 01365 // Bs += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS) 01366 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask)); 01367 01368 // gradSW ********************************************** 01369 mask = _mm_cmpgt_epi16(T, gradSW); // mask = T>gradSW 01370 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSW) 01371 01372 t0 = _mm_slli_epi16(x13, 1); // srow[bstep-1]*2 01373 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0] 01374 01375 // RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW) 01376 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01377 // GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW) 01378 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask)); 01379 // Bs += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW) 01380 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask)); 01381 01382 // gradW *********************************************** 01383 mask = _mm_cmpgt_epi16(T, gradW); // mask = T>gradW 01384 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradW) 01385 01386 t0 = _mm_slli_epi16(x15, 1); // srow[-1]*2 01387 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0] 01388 01389 // RGs += (srow[-2]+srow[0]) * (T>gradW) 01390 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01391 // GRs += (srow[-1]*2) * (T>gradW) 01392 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask)); 01393 // Bs += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW) 01394 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask)); 01395 01396 // gradNW ********************************************** 01397 mask = _mm_cmpgt_epi16(T, gradNW); // mask = T>gradNW 01398 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNW) 01399 01400 t0 = _mm_slli_epi16(x1, 1); // srow[-bstep-1]*2 01401 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0] 01402 01403 // RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW) 01404 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01405 // GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW) 01406 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask)); 01407 // Bs += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW) 01408 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask)); 01409 01410 __m128 ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng)); 01411 __m128 ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng)); 01412 01413 // now interpolate r, g & b 01414 t0 = _mm_subs_epi16(GRs, RGs); 01415 t1 = _mm_subs_epi16(Bs, RGs); 01416 01417 t0 = _mm_add_epi16(x0, _mm_packs_epi32( 01418 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)), 01419 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1)))); 01420 01421 t1 = _mm_add_epi16(x0, _mm_packs_epi32( 01422 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)), 01423 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1)))); 01424 01425 x1 = _mm_merge_epi16(x0, t0); 01426 x2 = _mm_merge_epi16(t0, x0); 01427 01428 uchar R[8], G[8], B[8]; 01429 01430 _mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z)); 01431 _mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z)); 01432 _mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z)); 01433 01434 for( int j = 0; j < 8; j++, dstrow += 3 ) 01435 { 01436 dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j]; 01437 } 01438 } 01439 #endif 01440 01441 limit = N - 2; 01442 } 01443 while( i < N - 2 ); 01444 01445 for( i = 0; i < 6; i++ ) 01446 { 01447 dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i]; 01448 dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i]; 01449 } 01450 01451 greenCell0 = !greenCell0; 01452 blueIdx ^= 2; 01453 } 01454 01455 for( i = 0; i < size.width*3; i++ ) 01456 { 01457 dst[i] = dst[i + dststep] = dst[i + dststep*2]; 01458 dst[i + dststep*(size.height-4)] = 01459 dst[i + dststep*(size.height-3)] = 01460 dst[i + dststep*(size.height-2)] = 01461 dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)]; 01462 } 01463 } 01464 01465 //////////////////////////////// Edge-Aware Demosaicing ////////////////////////////////// 01466 01467 template <typename T, typename SIMDInterpolator> 01468 class Bayer2RGB_EdgeAware_T_Invoker : 01469 public cv::ParallelLoopBody 01470 { 01471 public: 01472 Bayer2RGB_EdgeAware_T_Invoker(const Mat& _src, Mat& _dst, const Size& _size, 01473 int _blue, int _start_with_green) : 01474 ParallelLoopBody(), 01475 src(_src), dst(_dst), size(_size), Blue(_blue), Start_with_green(_start_with_green) 01476 { 01477 } 01478 01479 virtual void operator()(const Range& range) const 01480 { 01481 int dcn = dst.channels(); 01482 int dcn2 = dcn<<1; 01483 int start_with_green = Start_with_green, blue = Blue; 01484 int sstep = int(src.step / src.elemSize1()), dstep = int(dst.step / dst.elemSize1()); 01485 SIMDInterpolator vecOp; 01486 01487 const T* S = src.ptr<T>(range.start + 1) + 1; 01488 T* D = reinterpret_cast<T*>(dst.data + (range.start + 1) * dst.step) + dcn; 01489 01490 if (range.start % 2) 01491 { 01492 start_with_green ^= 1; 01493 blue ^= 1; 01494 } 01495 01496 // to BGR 01497 for (int y = range.start; y < range.end; ++y) 01498 { 01499 int x = 1; 01500 if (start_with_green) 01501 { 01502 D[blue<<1] = (S[-sstep] + S[sstep]) >> 1; 01503 D[1] = S[0]; 01504 D[2-(blue<<1)] = (S[-1] + S[1]) >> 1; 01505 D += dcn; 01506 ++S; 01507 ++x; 01508 } 01509 01510 int delta = vecOp.bayer2RGB_EA(S - sstep - 1, sstep, D, size.width, blue); 01511 x += delta; 01512 S += delta; 01513 D += dcn * delta; 01514 01515 if (blue) 01516 for (; x < size.width; x += 2, S += 2, D += dcn2) 01517 { 01518 D[0] = S[0]; 01519 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 01520 D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1]) >> 2; 01521 01522 D[3] = (S[0] + S[2] + 1) >> 1; 01523 D[4] = S[1]; 01524 D[5] = (S[-sstep+1] + S[sstep+1] + 1) >> 1; 01525 } 01526 else 01527 for (; x < size.width; x += 2, S += 2, D += dcn2) 01528 { 01529 D[0] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2; 01530 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 01531 D[2] = S[0]; 01532 01533 D[3] = (S[-sstep+1] + S[sstep+1] + 1) >> 1; 01534 D[4] = S[1]; 01535 D[5] = (S[0] + S[2] + 1) >> 1; 01536 } 01537 01538 if (x <= size.width) 01539 { 01540 D[blue<<1] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2; 01541 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 01542 D[2-(blue<<1)] = S[0]; 01543 D += dcn; 01544 ++S; 01545 } 01546 01547 for (int i = 0; i < dcn; ++i) 01548 { 01549 D[i] = D[-dcn + i]; 01550 D[-dstep+dcn+i] = D[-dstep+(dcn<<1)+i]; 01551 } 01552 01553 start_with_green ^= 1; 01554 blue ^= 1; 01555 S += 2; 01556 D += dcn2; 01557 } 01558 } 01559 01560 private: 01561 Mat src; 01562 Mat dst; 01563 Size size; 01564 int Blue, Start_with_green; 01565 }; 01566 01567 template <typename T, typename SIMDInterpolator> 01568 static void Bayer2RGB_EdgeAware_T(const Mat& src, Mat& dst, int code) 01569 { 01570 Size size = src.size(); 01571 01572 // for small sizes 01573 if (size.width <= 2 || size.height <= 2) 01574 { 01575 dst = Scalar::all(0); 01576 return; 01577 } 01578 01579 size.width -= 2; 01580 size.height -= 2; 01581 01582 int start_with_green = code == CV_BayerGB2BGR_EA || code == CV_BayerGR2BGR_EA ? 1 : 0; 01583 int blue = code == CV_BayerGB2BGR_EA || code == CV_BayerBG2BGR_EA ? 1 : 0; 01584 01585 if (size.height > 0) 01586 { 01587 Bayer2RGB_EdgeAware_T_Invoker<T, SIMDInterpolator> invoker(src, dst, size, blue, start_with_green); 01588 Range range(0, size.height); 01589 parallel_for_(range, invoker, dst.total()/static_cast<double>(1<<16)); 01590 } 01591 size = dst.size(); 01592 size.width *= dst.channels(); 01593 size_t dstep = dst.step / dst.elemSize1(); 01594 T* firstRow = dst.ptr<T>(); 01595 T* lastRow = dst.ptr<T>() + (size.height-1) * dstep; 01596 01597 if (size.height > 2) 01598 { 01599 for (int x = 0; x < size.width; ++x) 01600 { 01601 firstRow[x] = (firstRow+dstep)[x]; 01602 lastRow[x] = (lastRow-dstep)[x]; 01603 } 01604 } 01605 else 01606 for (int x = 0; x < size.width; ++x) 01607 firstRow[x] = lastRow[x] = 0; 01608 } 01609 01610 } // end namespace cv 01611 01612 ////////////////////////////////////////////////////////////////////////////////////////// 01613 // The main Demosaicing function // 01614 ////////////////////////////////////////////////////////////////////////////////////////// 01615 01616 void cv::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn) 01617 { 01618 Mat src = _src.getMat(), dst; 01619 Size sz = src.size(); 01620 int scn = src.channels(), depth = src.depth(); 01621 01622 CV_Assert(depth == CV_8U || depth == CV_16U); 01623 CV_Assert(!src.empty()); 01624 01625 switch (code) 01626 { 01627 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY: 01628 if (dcn <= 0) 01629 dcn = 1; 01630 CV_Assert( scn == 1 && dcn == 1 ); 01631 01632 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 01633 dst = _dst.getMat(); 01634 01635 if( depth == CV_8U ) 01636 Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code); 01637 else if( depth == CV_16U ) 01638 Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code); 01639 else 01640 CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types"); 01641 break; 01642 01643 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR: 01644 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG: 01645 { 01646 if (dcn <= 0) 01647 dcn = 3; 01648 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4) ); 01649 01650 _dst.create(sz, CV_MAKE_TYPE(depth, dcn)); 01651 Mat dst_ = _dst.getMat(); 01652 01653 if( code == CV_BayerBG2BGR || code == CV_BayerGB2BGR || 01654 code == CV_BayerRG2BGR || code == CV_BayerGR2BGR ) 01655 { 01656 if( depth == CV_8U ) 01657 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst_, code); 01658 else if( depth == CV_16U ) 01659 Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst_, code); 01660 else 01661 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types"); 01662 } 01663 else 01664 { 01665 CV_Assert( depth == CV_8U ); 01666 Bayer2RGB_VNG_8u(src, dst_, code); 01667 } 01668 } 01669 break; 01670 01671 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA: 01672 if (dcn <= 0) 01673 dcn = 3; 01674 01675 CV_Assert(scn == 1 && dcn == 3); 01676 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 01677 dst = _dst.getMat(); 01678 01679 if (depth == CV_8U) 01680 Bayer2RGB_EdgeAware_T<uchar, SIMDBayerInterpolator_8u>(src, dst, code); 01681 else if (depth == CV_16U) 01682 Bayer2RGB_EdgeAware_T<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code); 01683 else 01684 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB Edge-Aware demosaicing only currently supports 8u and 16u types"); 01685 01686 break; 01687 01688 default: 01689 CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" ); 01690 } 01691 } 01692
Generated on Tue Jul 12 2022 15:17:22 by 1.7.2