Important changes to repositories hosted on mbed.com
Mbed hosted mercurial repositories are deprecated and are due to be permanently deleted in July 2026.
To keep a copy of this software download the repository Zip archive or clone locally using Mercurial.
It is also possible to export all your personal repositories from the account settings page.
Fork of gr-peach-opencv-project-sd-card by
demosaicing.cpp
00001 /*M/////////////////////////////////////////////////////////////////////////////////////// 00002 // 00003 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 00004 // 00005 // By downloading, copying, installing or using the software you agree to this license. 00006 // If you do not agree to this license, do not download, install, 00007 // copy or use the software. 00008 // 00009 // 00010 // License Agreement 00011 // For Open Source Computer Vision Library 00012 // 00013 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 00014 // Copyright (C) 2009-2010, Willow Garage Inc., all rights reserved. 00015 // Copyright (C) 2014, Itseez Inc., all rights reserved. 00016 // Third party copyrights are property of their respective owners. 00017 // 00018 // Redistribution and use in source and binary forms, with or without modification, 00019 // are permitted provided that the following conditions are met: 00020 // 00021 // * Redistribution's of source code must retain the above copyright notice, 00022 // this list of conditions and the following disclaimer. 00023 // 00024 // * Redistribution's in binary form must reproduce the above copyright notice, 00025 // this list of conditions and the following disclaimer in the documentation 00026 // and/or other materials provided with the distribution. 00027 // 00028 // * The name of the copyright holders may not be used to endorse or promote products 00029 // derived from this software without specific prior written permission. 00030 // 00031 // This software is provided by the copyright holders and contributors "as is" and 00032 // any express or implied warranties, including, but not limited to, the implied 00033 // warranties of merchantability and fitness for a particular purpose are disclaimed. 00034 // In no event shall the Intel Corporation or contributors be liable for any direct, 00035 // indirect, incidental, special, exemplary, or consequential damages 00036 // (including, but not limited to, procurement of substitute goods or services; 00037 // loss of use, data, or profits; or business interruption) however caused 00038 // and on any theory of liability, whether in contract, strict liability, 00039 // or tort (including negligence or otherwise) arising in any way out of 00040 // the use of this software, even if advised of the possibility of such damage. 00041 // 00042 //M*/ 00043 00044 #include "precomp.hpp" 00045 00046 #include <limits> 00047 00048 #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) 00049 00050 namespace cv 00051 { 00052 00053 00054 //////////////////////////// Bayer Pattern -> RGB conversion ///////////////////////////// 00055 00056 template<typename T> 00057 class SIMDBayerStubInterpolator_ 00058 { 00059 public: 00060 int bayer2Gray(const T*, int, T*, int, int, int, int) const 00061 { 00062 return 0; 00063 } 00064 00065 int bayer2RGB(const T*, int, T*, int, int) const 00066 { 00067 return 0; 00068 } 00069 00070 int bayer2RGBA(const T*, int, T*, int, int) const 00071 { 00072 return 0; 00073 } 00074 00075 int bayer2RGB_EA(const T*, int, T*, int, int) const 00076 { 00077 return 0; 00078 } 00079 }; 00080 00081 #if CV_SSE2 00082 class SIMDBayerInterpolator_8u 00083 { 00084 public: 00085 SIMDBayerInterpolator_8u() 00086 { 00087 use_simd = checkHardwareSupport(CV_CPU_SSE2); 00088 } 00089 00090 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, 00091 int width, int bcoeff, int gcoeff, int rcoeff) const 00092 { 00093 if( !use_simd ) 00094 return 0; 00095 00096 __m128i _b2y = _mm_set1_epi16((short)(rcoeff*2)); 00097 __m128i _g2y = _mm_set1_epi16((short)(gcoeff*2)); 00098 __m128i _r2y = _mm_set1_epi16((short)(bcoeff*2)); 00099 const uchar* bayer_end = bayer + width; 00100 00101 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) 00102 { 00103 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 00104 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 00105 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 00106 00107 __m128i b1 = _mm_add_epi16(_mm_srli_epi16(_mm_slli_epi16(r0, 8), 7), 00108 _mm_srli_epi16(_mm_slli_epi16(r2, 8), 7)); 00109 __m128i b0 = _mm_add_epi16(b1, _mm_srli_si128(b1, 2)); 00110 b1 = _mm_slli_epi16(_mm_srli_si128(b1, 2), 1); 00111 00112 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 7), _mm_srli_epi16(r2, 7)); 00113 __m128i g1 = _mm_srli_epi16(_mm_slli_epi16(r1, 8), 7); 00114 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); 00115 g1 = _mm_slli_epi16(_mm_srli_si128(g1, 2), 2); 00116 00117 r0 = _mm_srli_epi16(r1, 8); 00118 r1 = _mm_slli_epi16(_mm_add_epi16(r0, _mm_srli_si128(r0, 2)), 2); 00119 r0 = _mm_slli_epi16(r0, 3); 00120 00121 g0 = _mm_add_epi16(_mm_mulhi_epi16(b0, _b2y), _mm_mulhi_epi16(g0, _g2y)); 00122 g1 = _mm_add_epi16(_mm_mulhi_epi16(b1, _b2y), _mm_mulhi_epi16(g1, _g2y)); 00123 g0 = _mm_add_epi16(g0, _mm_mulhi_epi16(r0, _r2y)); 00124 g1 = _mm_add_epi16(g1, _mm_mulhi_epi16(r1, _r2y)); 00125 g0 = _mm_srli_epi16(g0, 2); 00126 g1 = _mm_srli_epi16(g1, 2); 00127 g0 = _mm_packus_epi16(g0, g0); 00128 g1 = _mm_packus_epi16(g1, g1); 00129 g0 = _mm_unpacklo_epi8(g0, g1); 00130 _mm_storeu_si128((__m128i*)dst, g0); 00131 } 00132 00133 return (int)(bayer - (bayer_end - width)); 00134 } 00135 00136 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00137 { 00138 if( !use_simd ) 00139 return 0; 00140 /* 00141 B G B G | B G B G | B G B G | B G B G 00142 G R G R | G R G R | G R G R | G R G R 00143 B G B G | B G B G | B G B G | B G B G 00144 */ 00145 00146 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); 00147 __m128i mask = _mm_set1_epi16(blue < 0 ? -1 : 0), z = _mm_setzero_si128(); 00148 __m128i masklo = _mm_set1_epi16(0x00ff); 00149 const uchar* bayer_end = bayer + width; 00150 00151 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) 00152 { 00153 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 00154 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 00155 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 00156 00157 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklo), _mm_and_si128(r2, masklo)); 00158 __m128i nextb1 = _mm_srli_si128(b1, 2); 00159 __m128i b0 = _mm_add_epi16(b1, nextb1); 00160 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); 00161 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); 00162 // b0 b2 ... b14 b1 b3 ... b15 00163 b0 = _mm_packus_epi16(b0, b1); 00164 00165 __m128i g0 = _mm_add_epi16(_mm_srli_epi16(r0, 8), _mm_srli_epi16(r2, 8)); 00166 __m128i g1 = _mm_and_si128(r1, masklo); 00167 g0 = _mm_add_epi16(g0, _mm_add_epi16(g1, _mm_srli_si128(g1, 2))); 00168 g1 = _mm_srli_si128(g1, 2); 00169 g0 = _mm_srli_epi16(_mm_add_epi16(g0, delta2), 2); 00170 // g0 g2 ... g14 g1 g3 ... g15 00171 g0 = _mm_packus_epi16(g0, g1); 00172 00173 r0 = _mm_srli_epi16(r1, 8); 00174 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); 00175 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); 00176 // r0 r2 ... r14 r1 r3 ... r15 00177 r0 = _mm_packus_epi16(r0, r1); 00178 00179 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); 00180 b0 = _mm_xor_si128(b0, b1); 00181 r0 = _mm_xor_si128(r0, b1); 00182 00183 // b1 g1 b3 g3 b5 g5... 00184 b1 = _mm_unpackhi_epi8(b0, g0); 00185 // b0 g0 b2 g2 b4 g4 .... 00186 b0 = _mm_unpacklo_epi8(b0, g0); 00187 00188 // r1 0 r3 0 r5 0 ... 00189 r1 = _mm_unpackhi_epi8(r0, z); 00190 // r0 0 r2 0 r4 0 ... 00191 r0 = _mm_unpacklo_epi8(r0, z); 00192 00193 // 0 b0 g0 r0 0 b2 g2 r2 ... 00194 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); 00195 // 0 b8 g8 r8 0 b10 g10 r10 ... 00196 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); 00197 00198 // b1 g1 r1 0 b3 g3 r3 0 ... 00199 r0 = _mm_unpacklo_epi16(b1, r1); 00200 // b9 g9 r9 0 b11 g11 r11 0 ... 00201 r1 = _mm_unpackhi_epi16(b1, r1); 00202 00203 // 0 b0 g0 r0 b1 g1 r1 0 ... 00204 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); 00205 // 0 b4 g4 r4 b5 g5 r5 0 ... 00206 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); 00207 00208 _mm_storel_epi64((__m128i*)(dst-1+0), b0); 00209 _mm_storel_epi64((__m128i*)(dst-1+6*1), _mm_srli_si128(b0, 8)); 00210 _mm_storel_epi64((__m128i*)(dst-1+6*2), b1); 00211 _mm_storel_epi64((__m128i*)(dst-1+6*3), _mm_srli_si128(b1, 8)); 00212 00213 // 0 b8 g8 r8 b9 g9 r9 0 ... 00214 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); 00215 // 0 b12 g12 r12 b13 g13 r13 0 ... 00216 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); 00217 00218 _mm_storel_epi64((__m128i*)(dst-1+6*4), g0); 00219 _mm_storel_epi64((__m128i*)(dst-1+6*5), _mm_srli_si128(g0, 8)); 00220 00221 _mm_storel_epi64((__m128i*)(dst-1+6*6), g1); 00222 } 00223 00224 return (int)(bayer - (bayer_end - width)); 00225 } 00226 00227 int bayer2RGBA(const uchar*, int, uchar*, int, int) const 00228 { 00229 return 0; 00230 } 00231 00232 int bayer2RGB_EA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00233 { 00234 if (!use_simd) 00235 return 0; 00236 00237 const uchar* bayer_end = bayer + width; 00238 __m128i masklow = _mm_set1_epi16(0x00ff); 00239 __m128i delta1 = _mm_set1_epi16(1), delta2 = _mm_set1_epi16(2); 00240 __m128i full = _mm_set1_epi16(-1), z = _mm_setzero_si128(); 00241 __m128i mask = _mm_set1_epi16(blue > 0 ? -1 : 0); 00242 00243 for ( ; bayer <= bayer_end - 18; bayer += 14, dst += 42) 00244 { 00245 /* 00246 B G B G | B G B G | B G B G | B G B G 00247 G R G R | G R G R | G R G R | G R G R 00248 B G B G | B G B G | B G B G | B G B G 00249 */ 00250 00251 __m128i r0 = _mm_loadu_si128((const __m128i*)bayer); 00252 __m128i r1 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step)); 00253 __m128i r2 = _mm_loadu_si128((const __m128i*)(bayer+bayer_step*2)); 00254 00255 __m128i b1 = _mm_add_epi16(_mm_and_si128(r0, masklow), _mm_and_si128(r2, masklow)); 00256 __m128i nextb1 = _mm_srli_si128(b1, 2); 00257 __m128i b0 = _mm_add_epi16(b1, nextb1); 00258 b1 = _mm_srli_epi16(_mm_add_epi16(nextb1, delta1), 1); 00259 b0 = _mm_srli_epi16(_mm_add_epi16(b0, delta2), 2); 00260 // b0 b2 ... b14 b1 b3 ... b15 00261 b0 = _mm_packus_epi16(b0, b1); 00262 00263 // vertical sum 00264 __m128i r0g = _mm_srli_epi16(r0, 8); 00265 __m128i r2g = _mm_srli_epi16(r2, 8); 00266 __m128i sumv = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(r0g, r2g), delta1), 1); 00267 // gorizontal sum 00268 __m128i g1 = _mm_and_si128(masklow, r1); 00269 __m128i nextg1 = _mm_srli_si128(g1, 2); 00270 __m128i sumg = _mm_srli_epi16(_mm_add_epi16(_mm_add_epi16(g1, nextg1), delta1), 1); 00271 00272 // gradients 00273 __m128i gradv = _mm_adds_epi16(_mm_subs_epu16(r0g, r2g), _mm_subs_epu16(r2g, r0g)); 00274 __m128i gradg = _mm_adds_epi16(_mm_subs_epu16(nextg1, g1), _mm_subs_epu16(g1, nextg1)); 00275 __m128i gmask = _mm_cmpgt_epi16(gradg, gradv); 00276 00277 __m128i g0 = _mm_add_epi16(_mm_and_si128(gmask, sumv), _mm_and_si128(sumg, _mm_xor_si128(gmask, full))); 00278 // g0 g2 ... g14 g1 g3 ... 00279 g0 = _mm_packus_epi16(g0, nextg1); 00280 00281 r0 = _mm_srli_epi16(r1, 8); 00282 r1 = _mm_add_epi16(r0, _mm_srli_si128(r0, 2)); 00283 r1 = _mm_srli_epi16(_mm_add_epi16(r1, delta1), 1); 00284 // r0 r2 ... r14 r1 r3 ... r15 00285 r0 = _mm_packus_epi16(r0, r1); 00286 00287 b1 = _mm_and_si128(_mm_xor_si128(b0, r0), mask); 00288 b0 = _mm_xor_si128(b0, b1); 00289 r0 = _mm_xor_si128(r0, b1); 00290 00291 // b1 g1 b3 g3 b5 g5... 00292 b1 = _mm_unpackhi_epi8(b0, g0); 00293 // b0 g0 b2 g2 b4 g4 .... 00294 b0 = _mm_unpacklo_epi8(b0, g0); 00295 00296 // r1 0 r3 0 r5 0 ... 00297 r1 = _mm_unpackhi_epi8(r0, z); 00298 // r0 0 r2 0 r4 0 ... 00299 r0 = _mm_unpacklo_epi8(r0, z); 00300 00301 // 0 b0 g0 r0 0 b2 g2 r2 ... 00302 g0 = _mm_slli_si128(_mm_unpacklo_epi16(b0, r0), 1); 00303 // 0 b8 g8 r8 0 b10 g10 r10 ... 00304 g1 = _mm_slli_si128(_mm_unpackhi_epi16(b0, r0), 1); 00305 00306 // b1 g1 r1 0 b3 g3 r3 0 ... 00307 r0 = _mm_unpacklo_epi16(b1, r1); 00308 // b9 g9 r9 0 b11 g11 r11 0 ... 00309 r1 = _mm_unpackhi_epi16(b1, r1); 00310 00311 // 0 b0 g0 r0 b1 g1 r1 0 ... 00312 b0 = _mm_srli_si128(_mm_unpacklo_epi32(g0, r0), 1); 00313 // 0 b4 g4 r4 b5 g5 r5 0 ... 00314 b1 = _mm_srli_si128(_mm_unpackhi_epi32(g0, r0), 1); 00315 00316 _mm_storel_epi64((__m128i*)(dst+0), b0); 00317 _mm_storel_epi64((__m128i*)(dst+6*1), _mm_srli_si128(b0, 8)); 00318 _mm_storel_epi64((__m128i*)(dst+6*2), b1); 00319 _mm_storel_epi64((__m128i*)(dst+6*3), _mm_srli_si128(b1, 8)); 00320 00321 // 0 b8 g8 r8 b9 g9 r9 0 ... 00322 g0 = _mm_srli_si128(_mm_unpacklo_epi32(g1, r1), 1); 00323 // 0 b12 g12 r12 b13 g13 r13 0 ... 00324 g1 = _mm_srli_si128(_mm_unpackhi_epi32(g1, r1), 1); 00325 00326 _mm_storel_epi64((__m128i*)(dst+6*4), g0); 00327 _mm_storel_epi64((__m128i*)(dst+6*5), _mm_srli_si128(g0, 8)); 00328 00329 _mm_storel_epi64((__m128i*)(dst+6*6), g1); 00330 } 00331 00332 return int(bayer - (bayer_end - width)); 00333 } 00334 00335 bool use_simd; 00336 }; 00337 #elif CV_NEON 00338 class SIMDBayerInterpolator_8u 00339 { 00340 public: 00341 SIMDBayerInterpolator_8u() 00342 { 00343 } 00344 00345 int bayer2Gray(const uchar* bayer, int bayer_step, uchar* dst, 00346 int width, int bcoeff, int gcoeff, int rcoeff) const 00347 { 00348 /* 00349 B G B G | B G B G | B G B G | B G B G 00350 G R G R | G R G R | G R G R | G R G R 00351 B G B G | B G B G | B G B G | B G B G 00352 */ 00353 00354 uint16x8_t masklo = vdupq_n_u16(255); 00355 const uchar* bayer_end = bayer + width; 00356 00357 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 14 ) 00358 { 00359 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 00360 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 00361 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 00362 00363 uint16x8_t b1_ = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 00364 uint16x8_t b1 = vextq_u16(b1_, b1_, 1); 00365 uint16x8_t b0 = vaddq_u16(b1_, b1); 00366 // b0 = b0 b2 b4 ... 00367 // b1 = b1 b3 b5 ... 00368 00369 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 00370 uint16x8_t g1 = vandq_u16(r1, masklo); 00371 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 00372 uint16x8_t rot = vextq_u16(g1, g1, 1); 00373 g1 = vshlq_n_u16(rot, 2); 00374 // g0 = b0 b2 b4 ... 00375 // g1 = b1 b3 b5 ... 00376 00377 r0 = vshrq_n_u16(r1, 8); 00378 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 00379 r0 = vshlq_n_u16(r0, 2); 00380 // r0 = r0 r2 r4 ... 00381 // r1 = r1 r3 r5 ... 00382 00383 b0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b0), (short)(rcoeff*2))); 00384 b1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(b1), (short)(rcoeff*4))); 00385 00386 g0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g0), (short)(gcoeff*2))); 00387 g1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(g1), (short)(gcoeff*2))); 00388 00389 r0 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r0), (short)(bcoeff*2))); 00390 r1 = vreinterpretq_u16_s16(vqdmulhq_n_s16(vreinterpretq_s16_u16(r1), (short)(bcoeff*4))); 00391 00392 g0 = vaddq_u16(vaddq_u16(g0, b0), r0); 00393 g1 = vaddq_u16(vaddq_u16(g1, b1), r1); 00394 00395 uint8x8x2_t p = vzip_u8(vrshrn_n_u16(g0, 2), vrshrn_n_u16(g1, 2)); 00396 vst1_u8(dst, p.val[0]); 00397 vst1_u8(dst + 8, p.val[1]); 00398 } 00399 00400 return (int)(bayer - (bayer_end - width)); 00401 } 00402 00403 int bayer2RGB(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00404 { 00405 /* 00406 B G B G | B G B G | B G B G | B G B G 00407 G R G R | G R G R | G R G R | G R G R 00408 B G B G | B G B G | B G B G | B G B G 00409 */ 00410 uint16x8_t masklo = vdupq_n_u16(255); 00411 uint8x16x3_t pix; 00412 const uchar* bayer_end = bayer + width; 00413 00414 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 42 ) 00415 { 00416 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 00417 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 00418 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 00419 00420 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 00421 uint16x8_t nextb1 = vextq_u16(b1, b1, 1); 00422 uint16x8_t b0 = vaddq_u16(b1, nextb1); 00423 // b0 b1 b2 ... 00424 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1)); 00425 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]); 00426 00427 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 00428 uint16x8_t g1 = vandq_u16(r1, masklo); 00429 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 00430 g1 = vextq_u16(g1, g1, 1); 00431 // g0 g1 g2 ... 00432 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1)); 00433 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]); 00434 00435 r0 = vshrq_n_u16(r1, 8); 00436 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 00437 // r0 r1 r2 ... 00438 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1)); 00439 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]); 00440 00441 vst3q_u8(dst-1, pix); 00442 } 00443 00444 return (int)(bayer - (bayer_end - width)); 00445 } 00446 00447 int bayer2RGBA(const uchar* bayer, int bayer_step, uchar* dst, int width, int blue) const 00448 { 00449 /* 00450 B G B G | B G B G | B G B G | B G B G 00451 G R G R | G R G R | G R G R | G R G R 00452 B G B G | B G B G | B G B G | B G B G 00453 */ 00454 uint16x8_t masklo = vdupq_n_u16(255); 00455 uint8x16x4_t pix; 00456 const uchar* bayer_end = bayer + width; 00457 pix.val[3] = vdupq_n_u8(255); 00458 00459 for( ; bayer <= bayer_end - 18; bayer += 14, dst += 56 ) 00460 { 00461 uint16x8_t r0 = vld1q_u16((const ushort*)bayer); 00462 uint16x8_t r1 = vld1q_u16((const ushort*)(bayer + bayer_step)); 00463 uint16x8_t r2 = vld1q_u16((const ushort*)(bayer + bayer_step*2)); 00464 00465 uint16x8_t b1 = vaddq_u16(vandq_u16(r0, masklo), vandq_u16(r2, masklo)); 00466 uint16x8_t nextb1 = vextq_u16(b1, b1, 1); 00467 uint16x8_t b0 = vaddq_u16(b1, nextb1); 00468 // b0 b1 b2 ... 00469 uint8x8x2_t bb = vzip_u8(vrshrn_n_u16(b0, 2), vrshrn_n_u16(nextb1, 1)); 00470 pix.val[1-blue] = vcombine_u8(bb.val[0], bb.val[1]); 00471 00472 uint16x8_t g0 = vaddq_u16(vshrq_n_u16(r0, 8), vshrq_n_u16(r2, 8)); 00473 uint16x8_t g1 = vandq_u16(r1, masklo); 00474 g0 = vaddq_u16(g0, vaddq_u16(g1, vextq_u16(g1, g1, 1))); 00475 g1 = vextq_u16(g1, g1, 1); 00476 // g0 g1 g2 ... 00477 uint8x8x2_t gg = vzip_u8(vrshrn_n_u16(g0, 2), vmovn_u16(g1)); 00478 pix.val[1] = vcombine_u8(gg.val[0], gg.val[1]); 00479 00480 r0 = vshrq_n_u16(r1, 8); 00481 r1 = vaddq_u16(r0, vextq_u16(r0, r0, 1)); 00482 // r0 r1 r2 ... 00483 uint8x8x2_t rr = vzip_u8(vmovn_u16(r0), vrshrn_n_u16(r1, 1)); 00484 pix.val[1+blue] = vcombine_u8(rr.val[0], rr.val[1]); 00485 00486 vst4q_u8(dst-1, pix); 00487 } 00488 00489 return (int)(bayer - (bayer_end - width)); 00490 } 00491 00492 int bayer2RGB_EA(const uchar*, int, uchar*, int, int) const 00493 { 00494 return 0; 00495 } 00496 }; 00497 #else 00498 typedef SIMDBayerStubInterpolator_<uchar> SIMDBayerInterpolator_8u; 00499 #endif 00500 00501 00502 template<typename T, class SIMDInterpolator> 00503 class Bayer2Gray_Invoker : 00504 public ParallelLoopBody 00505 { 00506 public: 00507 Bayer2Gray_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, bool _brow, 00508 const Size& _size, int _bcoeff, int _rcoeff) : 00509 ParallelLoopBody(), srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), 00510 Brow(_brow), size(_size), Bcoeff(_bcoeff), Rcoeff(_rcoeff) 00511 { 00512 } 00513 00514 virtual void operator ()(const Range& range) const 00515 { 00516 SIMDInterpolator vecOp; 00517 const int G2Y = 9617; 00518 const int SHIFT = 14; 00519 00520 const T* bayer0 = srcmat.ptr<T>(); 00521 int bayer_step = (int)(srcmat.step/sizeof(T)); 00522 T* dst0 = (T*)dstmat.data; 00523 int dst_step = (int)(dstmat.step/sizeof(T)); 00524 int bcoeff = Bcoeff, rcoeff = Rcoeff; 00525 int start_with_green = Start_with_green; 00526 bool brow = Brow; 00527 00528 dst0 += dst_step + 1; 00529 00530 if (range.start % 2) 00531 { 00532 brow = !brow; 00533 std::swap(bcoeff, rcoeff); 00534 start_with_green = !start_with_green; 00535 } 00536 00537 bayer0 += range.start * bayer_step; 00538 dst0 += range.start * dst_step; 00539 00540 for(int i = range.start ; i < range.end; ++i, bayer0 += bayer_step, dst0 += dst_step ) 00541 { 00542 unsigned t0, t1, t2; 00543 const T* bayer = bayer0; 00544 T* dst = dst0; 00545 const T* bayer_end = bayer + size.width; 00546 00547 if( size.width <= 0 ) 00548 { 00549 dst[-1] = dst[size.width] = 0; 00550 continue; 00551 } 00552 00553 if( start_with_green ) 00554 { 00555 t0 = (bayer[1] + bayer[bayer_step*2+1])*rcoeff; 00556 t1 = (bayer[bayer_step] + bayer[bayer_step+2])*bcoeff; 00557 t2 = bayer[bayer_step+1]*(2*G2Y); 00558 00559 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1); 00560 bayer++; 00561 dst++; 00562 } 00563 00564 int delta = vecOp.bayer2Gray(bayer, bayer_step, dst, size.width, bcoeff, G2Y, rcoeff); 00565 bayer += delta; 00566 dst += delta; 00567 00568 for( ; bayer <= bayer_end - 2; bayer += 2, dst += 2 ) 00569 { 00570 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff; 00571 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y; 00572 t2 = bayer[bayer_step+1]*(4*bcoeff); 00573 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2); 00574 00575 t0 = (bayer[2] + bayer[bayer_step*2+2])*rcoeff; 00576 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3])*bcoeff; 00577 t2 = bayer[bayer_step+2]*(2*G2Y); 00578 dst[1] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+1); 00579 } 00580 00581 if( bayer < bayer_end ) 00582 { 00583 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + bayer[bayer_step*2+2])*rcoeff; 00584 t1 = (bayer[1] + bayer[bayer_step] + bayer[bayer_step+2] + bayer[bayer_step*2+1])*G2Y; 00585 t2 = bayer[bayer_step+1]*(4*bcoeff); 00586 dst[0] = (T)CV_DESCALE(t0 + t1 + t2, SHIFT+2); 00587 bayer++; 00588 dst++; 00589 } 00590 00591 dst0[-1] = dst0[0]; 00592 dst0[size.width] = dst0[size.width-1]; 00593 00594 brow = !brow; 00595 std::swap(bcoeff, rcoeff); 00596 start_with_green = !start_with_green; 00597 } 00598 } 00599 00600 private: 00601 Mat srcmat; 00602 Mat dstmat; 00603 int Start_with_green; 00604 bool Brow; 00605 Size size; 00606 int Bcoeff, Rcoeff; 00607 }; 00608 00609 template<typename T, typename SIMDInterpolator> 00610 static void Bayer2Gray_( const Mat& srcmat, Mat& dstmat, int code ) 00611 { 00612 const int R2Y = 4899; 00613 const int B2Y = 1868; 00614 00615 Size size = srcmat.size(); 00616 int bcoeff = B2Y, rcoeff = R2Y; 00617 int start_with_green = code == CV_BayerGB2GRAY || code == CV_BayerGR2GRAY; 00618 bool brow = true; 00619 00620 if( code != CV_BayerBG2GRAY && code != CV_BayerGB2GRAY ) 00621 { 00622 brow = false; 00623 std::swap(bcoeff, rcoeff); 00624 } 00625 size.height -= 2; 00626 size.width -= 2; 00627 00628 if (size.height > 0) 00629 { 00630 Range range(0, size.height); 00631 Bayer2Gray_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, 00632 start_with_green, brow, size, bcoeff, rcoeff); 00633 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16)); 00634 } 00635 00636 size = dstmat.size(); 00637 T* dst0 = dstmat.ptr<T>(); 00638 int dst_step = (int)(dstmat.step/sizeof(T)); 00639 if( size.height > 2 ) 00640 for( int i = 0; i < size.width; i++ ) 00641 { 00642 dst0[i] = dst0[i + dst_step]; 00643 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step]; 00644 } 00645 else 00646 for( int i = 0; i < size.width; i++ ) 00647 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0; 00648 } 00649 00650 template <typename T> 00651 struct Alpha 00652 { 00653 static T value() { return std::numeric_limits<T>::max(); } 00654 }; 00655 00656 template <> 00657 struct Alpha<float> 00658 { 00659 static float value() { return 1.0f; } 00660 }; 00661 00662 template <typename T, typename SIMDInterpolator> 00663 class Bayer2RGB_Invoker : 00664 public ParallelLoopBody 00665 { 00666 public: 00667 Bayer2RGB_Invoker(const Mat& _srcmat, Mat& _dstmat, int _start_with_green, int _blue, const Size& _size) : 00668 ParallelLoopBody(), 00669 srcmat(_srcmat), dstmat(_dstmat), Start_with_green(_start_with_green), Blue(_blue), size(_size) 00670 { 00671 } 00672 00673 virtual void operator() (const Range& range) const 00674 { 00675 SIMDInterpolator vecOp; 00676 T alpha = Alpha<T>::value(); 00677 int dcn = dstmat.channels(); 00678 int dcn2 = dcn << 1; 00679 00680 int bayer_step = (int)(srcmat.step/sizeof(T)); 00681 const T* bayer0 = srcmat.ptr<T>() + bayer_step * range.start; 00682 00683 int dst_step = (int)(dstmat.step/sizeof(T)); 00684 T* dst0 = reinterpret_cast<T*>(dstmat.data) + (range.start + 1) * dst_step + dcn + 1; 00685 00686 int blue = Blue, start_with_green = Start_with_green; 00687 if (range.start % 2) 00688 { 00689 blue = -blue; 00690 start_with_green = !start_with_green; 00691 } 00692 00693 for (int i = range.start; i < range.end; bayer0 += bayer_step, dst0 += dst_step, ++i ) 00694 { 00695 int t0, t1; 00696 const T* bayer = bayer0; 00697 T* dst = dst0; 00698 const T* bayer_end = bayer + size.width; 00699 00700 // in case of when size.width <= 2 00701 if( size.width <= 0 ) 00702 { 00703 if (dcn == 3) 00704 { 00705 dst[-4] = dst[-3] = dst[-2] = dst[size.width*dcn-1] = 00706 dst[size.width*dcn] = dst[size.width*dcn+1] = 0; 00707 } 00708 else 00709 { 00710 dst[-5] = dst[-4] = dst[-3] = dst[size.width*dcn-1] = 00711 dst[size.width*dcn] = dst[size.width*dcn+1] = 0; 00712 dst[-2] = dst[size.width*dcn+2] = alpha; 00713 } 00714 continue; 00715 } 00716 00717 if( start_with_green ) 00718 { 00719 t0 = (bayer[1] + bayer[bayer_step*2+1] + 1) >> 1; 00720 t1 = (bayer[bayer_step] + bayer[bayer_step+2] + 1) >> 1; 00721 00722 dst[-blue] = (T)t0; 00723 dst[0] = bayer[bayer_step+1]; 00724 dst[blue] = (T)t1; 00725 if (dcn == 4) 00726 dst[2] = alpha; // alpha channel 00727 00728 bayer++; 00729 dst += dcn; 00730 } 00731 00732 // simd optimization only for dcn == 3 00733 int delta = dcn == 4 ? 00734 vecOp.bayer2RGBA(bayer, bayer_step, dst, size.width, blue) : 00735 vecOp.bayer2RGB(bayer, bayer_step, dst, size.width, blue); 00736 bayer += delta; 00737 dst += delta*dcn; 00738 00739 if (dcn == 3) // Bayer to BGR 00740 { 00741 if( blue > 0 ) 00742 { 00743 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00744 { 00745 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00746 bayer[bayer_step*2+2] + 2) >> 2; 00747 t1 = (bayer[1] + bayer[bayer_step] + 00748 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00749 dst[-1] = (T)t0; 00750 dst[0] = (T)t1; 00751 dst[1] = bayer[bayer_step+1]; 00752 00753 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00754 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00755 dst[2] = (T)t0; 00756 dst[3] = bayer[bayer_step+2]; 00757 dst[4] = (T)t1; 00758 } 00759 } 00760 else 00761 { 00762 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00763 { 00764 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00765 bayer[bayer_step*2+2] + 2) >> 2; 00766 t1 = (bayer[1] + bayer[bayer_step] + 00767 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00768 dst[1] = (T)t0; 00769 dst[0] = (T)t1; 00770 dst[-1] = bayer[bayer_step+1]; 00771 00772 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00773 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00774 dst[4] = (T)t0; 00775 dst[3] = bayer[bayer_step+2]; 00776 dst[2] = (T)t1; 00777 } 00778 } 00779 } 00780 else // Bayer to BGRA 00781 { 00782 // if current row does not contain Blue pixels 00783 if( blue > 0 ) 00784 { 00785 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00786 { 00787 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00788 bayer[bayer_step*2+2] + 2) >> 2; 00789 t1 = (bayer[1] + bayer[bayer_step] + 00790 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00791 dst[-1] = (T)t0; 00792 dst[0] = (T)t1; 00793 dst[1] = bayer[bayer_step+1]; 00794 dst[2] = alpha; // alpha channel 00795 00796 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00797 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00798 dst[3] = (T)t0; 00799 dst[4] = bayer[bayer_step+2]; 00800 dst[5] = (T)t1; 00801 dst[6] = alpha; // alpha channel 00802 } 00803 } 00804 else // if current row contains Blue pixels 00805 { 00806 for( ; bayer <= bayer_end - 2; bayer += 2, dst += dcn2 ) 00807 { 00808 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00809 bayer[bayer_step*2+2] + 2) >> 2; 00810 t1 = (bayer[1] + bayer[bayer_step] + 00811 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00812 dst[-1] = bayer[bayer_step+1]; 00813 dst[0] = (T)t1; 00814 dst[1] = (T)t0; 00815 dst[2] = alpha; // alpha channel 00816 00817 t0 = (bayer[2] + bayer[bayer_step*2+2] + 1) >> 1; 00818 t1 = (bayer[bayer_step+1] + bayer[bayer_step+3] + 1) >> 1; 00819 dst[3] = (T)t1; 00820 dst[4] = bayer[bayer_step+2]; 00821 dst[5] = (T)t0; 00822 dst[6] = alpha; // alpha channel 00823 } 00824 } 00825 } 00826 00827 // if skip one pixel at the end of row 00828 if( bayer < bayer_end ) 00829 { 00830 t0 = (bayer[0] + bayer[2] + bayer[bayer_step*2] + 00831 bayer[bayer_step*2+2] + 2) >> 2; 00832 t1 = (bayer[1] + bayer[bayer_step] + 00833 bayer[bayer_step+2] + bayer[bayer_step*2+1]+2) >> 2; 00834 dst[-blue] = (T)t0; 00835 dst[0] = (T)t1; 00836 dst[blue] = bayer[bayer_step+1]; 00837 if (dcn == 4) 00838 dst[2] = alpha; // alpha channel 00839 bayer++; 00840 dst += dcn; 00841 } 00842 00843 // fill the last and the first pixels of row accordingly 00844 if (dcn == 3) 00845 { 00846 dst0[-4] = dst0[-1]; 00847 dst0[-3] = dst0[0]; 00848 dst0[-2] = dst0[1]; 00849 dst0[size.width*dcn-1] = dst0[size.width*dcn-4]; 00850 dst0[size.width*dcn] = dst0[size.width*dcn-3]; 00851 dst0[size.width*dcn+1] = dst0[size.width*dcn-2]; 00852 } 00853 else 00854 { 00855 dst0[-5] = dst0[-1]; 00856 dst0[-4] = dst0[0]; 00857 dst0[-3] = dst0[1]; 00858 dst0[-2] = dst0[2]; // alpha channel 00859 dst0[size.width*dcn-1] = dst0[size.width*dcn-5]; 00860 dst0[size.width*dcn] = dst0[size.width*dcn-4]; 00861 dst0[size.width*dcn+1] = dst0[size.width*dcn-3]; 00862 dst0[size.width*dcn+2] = dst0[size.width*dcn-2]; // alpha channel 00863 } 00864 00865 blue = -blue; 00866 start_with_green = !start_with_green; 00867 } 00868 } 00869 00870 private: 00871 Mat srcmat; 00872 Mat dstmat; 00873 int Start_with_green, Blue; 00874 Size size; 00875 }; 00876 00877 template<typename T, class SIMDInterpolator> 00878 static void Bayer2RGB_( const Mat& srcmat, Mat& dstmat, int code ) 00879 { 00880 int dst_step = (int)(dstmat.step/sizeof(T)); 00881 Size size = srcmat.size(); 00882 int blue = code == CV_BayerBG2BGR || code == CV_BayerGB2BGR ? -1 : 1; 00883 int start_with_green = code == CV_BayerGB2BGR || code == CV_BayerGR2BGR; 00884 00885 int dcn = dstmat.channels(); 00886 size.height -= 2; 00887 size.width -= 2; 00888 00889 if (size.height > 0) 00890 { 00891 Range range(0, size.height); 00892 Bayer2RGB_Invoker<T, SIMDInterpolator> invoker(srcmat, dstmat, start_with_green, blue, size); 00893 parallel_for_(range, invoker, dstmat.total()/static_cast<double>(1<<16)); 00894 } 00895 00896 // filling the first and the last rows 00897 size = dstmat.size(); 00898 T* dst0 = dstmat.ptr<T>(); 00899 if( size.height > 2 ) 00900 for( int i = 0; i < size.width*dcn; i++ ) 00901 { 00902 dst0[i] = dst0[i + dst_step]; 00903 dst0[i + (size.height-1)*dst_step] = dst0[i + (size.height-2)*dst_step]; 00904 } 00905 else 00906 for( int i = 0; i < size.width*dcn; i++ ) 00907 dst0[i] = dst0[i + (size.height-1)*dst_step] = 0; 00908 } 00909 00910 00911 /////////////////// Demosaicing using Variable Number of Gradients /////////////////////// 00912 00913 static void Bayer2RGB_VNG_8u( const Mat& srcmat, Mat& dstmat, int code ) 00914 { 00915 const uchar* bayer = srcmat.ptr(); 00916 int bstep = (int)srcmat.step; 00917 uchar* dst = dstmat.ptr(); 00918 int dststep = (int)dstmat.step; 00919 Size size = srcmat.size(); 00920 00921 int blueIdx = code == CV_BayerBG2BGR_VNG || code == CV_BayerGB2BGR_VNG ? 0 : 2; 00922 bool greenCell0 = code != CV_BayerBG2BGR_VNG && code != CV_BayerRG2BGR_VNG; 00923 00924 // for too small images use the simple interpolation algorithm 00925 if( MIN(size.width, size.height) < 8 ) 00926 { 00927 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>( srcmat, dstmat, code ); 00928 return; 00929 } 00930 00931 const int brows = 3, bcn = 7; 00932 int N = size.width, N2 = N*2, N3 = N*3, N4 = N*4, N5 = N*5, N6 = N*6, N7 = N*7; 00933 int i, bufstep = N7*bcn; 00934 cv::AutoBuffer<ushort> _buf(bufstep*brows); 00935 ushort* buf = (ushort*)_buf; 00936 00937 bayer += bstep*2; 00938 00939 #if CV_SSE2 00940 bool haveSSE = cv::checkHardwareSupport(CV_CPU_SSE2); 00941 #define _mm_absdiff_epu16(a,b) _mm_adds_epu16(_mm_subs_epu16(a, b), _mm_subs_epu16(b, a)) 00942 #endif 00943 00944 for( int y = 2; y < size.height - 4; y++ ) 00945 { 00946 uchar* dstrow = dst + dststep*y + 6; 00947 const uchar* srow; 00948 00949 for( int dy = (y == 2 ? -1 : 1); dy <= 1; dy++ ) 00950 { 00951 ushort* brow = buf + ((y + dy - 1)%brows)*bufstep + 1; 00952 srow = bayer + (y+dy)*bstep + 1; 00953 00954 for( i = 0; i < bcn; i++ ) 00955 brow[N*i-1] = brow[(N-2) + N*i] = 0; 00956 00957 i = 1; 00958 00959 #if CV_SSE2 00960 if( haveSSE ) 00961 { 00962 __m128i z = _mm_setzero_si128(); 00963 for( ; i <= N-9; i += 8, srow += 8, brow += 8 ) 00964 { 00965 __m128i s1, s2, s3, s4, s6, s7, s8, s9; 00966 00967 s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1-bstep)),z); 00968 s2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-bstep)),z); 00969 s3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1-bstep)),z); 00970 00971 s4 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1)),z); 00972 s6 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1)),z); 00973 00974 s7 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow-1+bstep)),z); 00975 s8 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+bstep)),z); 00976 s9 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)(srow+1+bstep)),z); 00977 00978 __m128i b0, b1, b2, b3, b4, b5, b6; 00979 00980 b0 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s2,s8),1), 00981 _mm_adds_epu16(_mm_absdiff_epu16(s1, s7), 00982 _mm_absdiff_epu16(s3, s9))); 00983 b1 = _mm_adds_epu16(_mm_slli_epi16(_mm_absdiff_epu16(s4,s6),1), 00984 _mm_adds_epu16(_mm_absdiff_epu16(s1, s3), 00985 _mm_absdiff_epu16(s7, s9))); 00986 b2 = _mm_slli_epi16(_mm_absdiff_epu16(s3,s7),1); 00987 b3 = _mm_slli_epi16(_mm_absdiff_epu16(s1,s9),1); 00988 00989 _mm_storeu_si128((__m128i*)brow, b0); 00990 _mm_storeu_si128((__m128i*)(brow + N), b1); 00991 _mm_storeu_si128((__m128i*)(brow + N2), b2); 00992 _mm_storeu_si128((__m128i*)(brow + N3), b3); 00993 00994 b4 = _mm_adds_epu16(b2,_mm_adds_epu16(_mm_absdiff_epu16(s2, s4), 00995 _mm_absdiff_epu16(s6, s8))); 00996 b5 = _mm_adds_epu16(b3,_mm_adds_epu16(_mm_absdiff_epu16(s2, s6), 00997 _mm_absdiff_epu16(s4, s8))); 00998 b6 = _mm_adds_epu16(_mm_adds_epu16(s2, s4), _mm_adds_epu16(s6, s8)); 00999 b6 = _mm_srli_epi16(b6, 1); 01000 01001 _mm_storeu_si128((__m128i*)(brow + N4), b4); 01002 _mm_storeu_si128((__m128i*)(brow + N5), b5); 01003 _mm_storeu_si128((__m128i*)(brow + N6), b6); 01004 } 01005 } 01006 #endif 01007 01008 for( ; i < N-1; i++, srow++, brow++ ) 01009 { 01010 brow[0] = (ushort)(std::abs(srow[-1-bstep] - srow[-1+bstep]) + 01011 std::abs(srow[-bstep] - srow[+bstep])*2 + 01012 std::abs(srow[1-bstep] - srow[1+bstep])); 01013 brow[N] = (ushort)(std::abs(srow[-1-bstep] - srow[1-bstep]) + 01014 std::abs(srow[-1] - srow[1])*2 + 01015 std::abs(srow[-1+bstep] - srow[1+bstep])); 01016 brow[N2] = (ushort)(std::abs(srow[+1-bstep] - srow[-1+bstep])*2); 01017 brow[N3] = (ushort)(std::abs(srow[-1-bstep] - srow[1+bstep])*2); 01018 brow[N4] = (ushort)(brow[N2] + std::abs(srow[-bstep] - srow[-1]) + 01019 std::abs(srow[+bstep] - srow[1])); 01020 brow[N5] = (ushort)(brow[N3] + std::abs(srow[-bstep] - srow[1]) + 01021 std::abs(srow[+bstep] - srow[-1])); 01022 brow[N6] = (ushort)((srow[-bstep] + srow[-1] + srow[1] + srow[+bstep])>>1); 01023 } 01024 } 01025 01026 const ushort* brow0 = buf + ((y - 2) % brows)*bufstep + 2; 01027 const ushort* brow1 = buf + ((y - 1) % brows)*bufstep + 2; 01028 const ushort* brow2 = buf + (y % brows)*bufstep + 2; 01029 static const float scale[] = { 0.f, 0.5f, 0.25f, 0.1666666666667f, 0.125f, 0.1f, 0.08333333333f, 0.0714286f, 0.0625f }; 01030 srow = bayer + y*bstep + 2; 01031 bool greenCell = greenCell0; 01032 01033 i = 2; 01034 #if CV_SSE2 01035 int limit = !haveSSE ? N-2 : greenCell ? std::min(3, N-2) : 2; 01036 #else 01037 int limit = N - 2; 01038 #endif 01039 01040 do 01041 { 01042 for( ; i < limit; i++, srow++, brow0++, brow1++, brow2++, dstrow += 3 ) 01043 { 01044 int gradN = brow0[0] + brow1[0]; 01045 int gradS = brow1[0] + brow2[0]; 01046 int gradW = brow1[N-1] + brow1[N]; 01047 int gradE = brow1[N] + brow1[N+1]; 01048 int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE); 01049 int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE); 01050 int R, G, B; 01051 01052 if( !greenCell ) 01053 { 01054 int gradNE = brow0[N4+1] + brow1[N4]; 01055 int gradSW = brow1[N4] + brow2[N4-1]; 01056 int gradNW = brow0[N5-1] + brow1[N5]; 01057 int gradSE = brow1[N5] + brow2[N5+1]; 01058 01059 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE); 01060 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE); 01061 int T = minGrad + MAX(maxGrad/2, 1); 01062 01063 int Rs = 0, Gs = 0, Bs = 0, ng = 0; 01064 if( gradN < T ) 01065 { 01066 Rs += srow[-bstep*2] + srow[0]; 01067 Gs += srow[-bstep]*2; 01068 Bs += srow[-bstep-1] + srow[-bstep+1]; 01069 ng++; 01070 } 01071 if( gradS < T ) 01072 { 01073 Rs += srow[bstep*2] + srow[0]; 01074 Gs += srow[bstep]*2; 01075 Bs += srow[bstep-1] + srow[bstep+1]; 01076 ng++; 01077 } 01078 if( gradW < T ) 01079 { 01080 Rs += srow[-2] + srow[0]; 01081 Gs += srow[-1]*2; 01082 Bs += srow[-bstep-1] + srow[bstep-1]; 01083 ng++; 01084 } 01085 if( gradE < T ) 01086 { 01087 Rs += srow[2] + srow[0]; 01088 Gs += srow[1]*2; 01089 Bs += srow[-bstep+1] + srow[bstep+1]; 01090 ng++; 01091 } 01092 if( gradNE < T ) 01093 { 01094 Rs += srow[-bstep*2+2] + srow[0]; 01095 Gs += brow0[N6+1]; 01096 Bs += srow[-bstep+1]*2; 01097 ng++; 01098 } 01099 if( gradSW < T ) 01100 { 01101 Rs += srow[bstep*2-2] + srow[0]; 01102 Gs += brow2[N6-1]; 01103 Bs += srow[bstep-1]*2; 01104 ng++; 01105 } 01106 if( gradNW < T ) 01107 { 01108 Rs += srow[-bstep*2-2] + srow[0]; 01109 Gs += brow0[N6-1]; 01110 Bs += srow[-bstep+1]*2; 01111 ng++; 01112 } 01113 if( gradSE < T ) 01114 { 01115 Rs += srow[bstep*2+2] + srow[0]; 01116 Gs += brow2[N6+1]; 01117 Bs += srow[-bstep+1]*2; 01118 ng++; 01119 } 01120 R = srow[0]; 01121 G = R + cvRound((Gs - Rs)*scale[ng]); 01122 B = R + cvRound((Bs - Rs)*scale[ng]); 01123 } 01124 else 01125 { 01126 int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1]; 01127 int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1]; 01128 int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1]; 01129 int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1]; 01130 01131 minGrad = std::min(std::min(std::min(std::min(minGrad, gradNE), gradSW), gradNW), gradSE); 01132 maxGrad = std::max(std::max(std::max(std::max(maxGrad, gradNE), gradSW), gradNW), gradSE); 01133 int T = minGrad + MAX(maxGrad/2, 1); 01134 01135 int Rs = 0, Gs = 0, Bs = 0, ng = 0; 01136 if( gradN < T ) 01137 { 01138 Rs += srow[-bstep*2-1] + srow[-bstep*2+1]; 01139 Gs += srow[-bstep*2] + srow[0]; 01140 Bs += srow[-bstep]*2; 01141 ng++; 01142 } 01143 if( gradS < T ) 01144 { 01145 Rs += srow[bstep*2-1] + srow[bstep*2+1]; 01146 Gs += srow[bstep*2] + srow[0]; 01147 Bs += srow[bstep]*2; 01148 ng++; 01149 } 01150 if( gradW < T ) 01151 { 01152 Rs += srow[-1]*2; 01153 Gs += srow[-2] + srow[0]; 01154 Bs += srow[-bstep-2]+srow[bstep-2]; 01155 ng++; 01156 } 01157 if( gradE < T ) 01158 { 01159 Rs += srow[1]*2; 01160 Gs += srow[2] + srow[0]; 01161 Bs += srow[-bstep+2]+srow[bstep+2]; 01162 ng++; 01163 } 01164 if( gradNE < T ) 01165 { 01166 Rs += srow[-bstep*2+1] + srow[1]; 01167 Gs += srow[-bstep+1]*2; 01168 Bs += srow[-bstep] + srow[-bstep+2]; 01169 ng++; 01170 } 01171 if( gradSW < T ) 01172 { 01173 Rs += srow[bstep*2-1] + srow[-1]; 01174 Gs += srow[bstep-1]*2; 01175 Bs += srow[bstep] + srow[bstep-2]; 01176 ng++; 01177 } 01178 if( gradNW < T ) 01179 { 01180 Rs += srow[-bstep*2-1] + srow[-1]; 01181 Gs += srow[-bstep-1]*2; 01182 Bs += srow[-bstep-2]+srow[-bstep]; 01183 ng++; 01184 } 01185 if( gradSE < T ) 01186 { 01187 Rs += srow[bstep*2+1] + srow[1]; 01188 Gs += srow[bstep+1]*2; 01189 Bs += srow[bstep+2]+srow[bstep]; 01190 ng++; 01191 } 01192 G = srow[0]; 01193 R = G + cvRound((Rs - Gs)*scale[ng]); 01194 B = G + cvRound((Bs - Gs)*scale[ng]); 01195 } 01196 dstrow[blueIdx] = cv::saturate_cast<uchar>(B); 01197 dstrow[1] = cv::saturate_cast<uchar>(G); 01198 dstrow[blueIdx^2] = cv::saturate_cast<uchar>(R); 01199 greenCell = !greenCell; 01200 } 01201 01202 #if CV_SSE2 01203 if( !haveSSE ) 01204 break; 01205 01206 __m128i emask = _mm_set1_epi32(0x0000ffff), 01207 omask = _mm_set1_epi32(0xffff0000), 01208 z = _mm_setzero_si128(), 01209 one = _mm_set1_epi16(1); 01210 __m128 _0_5 = _mm_set1_ps(0.5f); 01211 01212 #define _mm_merge_epi16(a, b) _mm_or_si128(_mm_and_si128(a, emask), _mm_and_si128(b, omask)) //(aA_aA_aA_aA) * (bB_bB_bB_bB) => (bA_bA_bA_bA) 01213 #define _mm_cvtloepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpacklo_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (1f,2f,3f,4f) 01214 #define _mm_cvthiepi16_ps(a) _mm_cvtepi32_ps(_mm_srai_epi32(_mm_unpackhi_epi16(a,a), 16)) //(1,2,3,4,5,6,7,8) => (5f,6f,7f,8f) 01215 #define _mm_loadl_u8_s16(ptr, offset) _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i*)((ptr) + (offset))), z) //load 8 uchars to 8 shorts 01216 01217 // process 8 pixels at once 01218 for( ; i <= N - 10; i += 8, srow += 8, brow0 += 8, brow1 += 8, brow2 += 8 ) 01219 { 01220 //int gradN = brow0[0] + brow1[0]; 01221 __m128i gradN = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow0), _mm_loadu_si128((__m128i*)brow1)); 01222 01223 //int gradS = brow1[0] + brow2[0]; 01224 __m128i gradS = _mm_adds_epi16(_mm_loadu_si128((__m128i*)brow1), _mm_loadu_si128((__m128i*)brow2)); 01225 01226 //int gradW = brow1[N-1] + brow1[N]; 01227 __m128i gradW = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N-1)), _mm_loadu_si128((__m128i*)(brow1+N))); 01228 01229 //int gradE = brow1[N+1] + brow1[N]; 01230 __m128i gradE = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N+1)), _mm_loadu_si128((__m128i*)(brow1+N))); 01231 01232 //int minGrad = std::min(std::min(std::min(gradN, gradS), gradW), gradE); 01233 //int maxGrad = std::max(std::max(std::max(gradN, gradS), gradW), gradE); 01234 __m128i minGrad = _mm_min_epi16(_mm_min_epi16(gradN, gradS), _mm_min_epi16(gradW, gradE)); 01235 __m128i maxGrad = _mm_max_epi16(_mm_max_epi16(gradN, gradS), _mm_max_epi16(gradW, gradE)); 01236 01237 __m128i grad0, grad1; 01238 01239 //int gradNE = brow0[N4+1] + brow1[N4]; 01240 //int gradNE = brow0[N2] + brow0[N2+1] + brow1[N2] + brow1[N2+1]; 01241 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N4+1)), _mm_loadu_si128((__m128i*)(brow1+N4))); 01242 grad1 = _mm_adds_epi16( _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N2)), _mm_loadu_si128((__m128i*)(brow0+N2+1))), 01243 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2+1)))); 01244 __m128i gradNE = _mm_merge_epi16(grad0, grad1); 01245 01246 //int gradSW = brow1[N4] + brow2[N4-1]; 01247 //int gradSW = brow1[N2] + brow1[N2-1] + brow2[N2] + brow2[N2-1]; 01248 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N4-1)), _mm_loadu_si128((__m128i*)(brow1+N4))); 01249 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N2)), _mm_loadu_si128((__m128i*)(brow2+N2-1))), 01250 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N2)), _mm_loadu_si128((__m128i*)(brow1+N2-1)))); 01251 __m128i gradSW = _mm_merge_epi16(grad0, grad1); 01252 01253 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNE), gradSW); 01254 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNE), gradSW); 01255 01256 //int gradNW = brow0[N5-1] + brow1[N5]; 01257 //int gradNW = brow0[N3] + brow0[N3-1] + brow1[N3] + brow1[N3-1]; 01258 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N5-1)), _mm_loadu_si128((__m128i*)(brow1+N5))); 01259 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow0+N3)), _mm_loadu_si128((__m128i*)(brow0+N3-1))), 01260 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3-1)))); 01261 __m128i gradNW = _mm_merge_epi16(grad0, grad1); 01262 01263 //int gradSE = brow1[N5] + brow2[N5+1]; 01264 //int gradSE = brow1[N3] + brow1[N3+1] + brow2[N3] + brow2[N3+1]; 01265 grad0 = _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N5+1)), _mm_loadu_si128((__m128i*)(brow1+N5))); 01266 grad1 = _mm_adds_epi16(_mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow2+N3)), _mm_loadu_si128((__m128i*)(brow2+N3+1))), 01267 _mm_adds_epi16(_mm_loadu_si128((__m128i*)(brow1+N3)), _mm_loadu_si128((__m128i*)(brow1+N3+1)))); 01268 __m128i gradSE = _mm_merge_epi16(grad0, grad1); 01269 01270 minGrad = _mm_min_epi16(_mm_min_epi16(minGrad, gradNW), gradSE); 01271 maxGrad = _mm_max_epi16(_mm_max_epi16(maxGrad, gradNW), gradSE); 01272 01273 //int T = minGrad + maxGrad/2; 01274 __m128i T = _mm_adds_epi16(_mm_max_epi16(_mm_srli_epi16(maxGrad, 1), one), minGrad); 01275 01276 __m128i RGs = z, GRs = z, Bs = z, ng = z; 01277 01278 __m128i x0 = _mm_loadl_u8_s16(srow, +0 ); 01279 __m128i x1 = _mm_loadl_u8_s16(srow, -1 - bstep ); 01280 __m128i x2 = _mm_loadl_u8_s16(srow, -1 - bstep*2); 01281 __m128i x3 = _mm_loadl_u8_s16(srow, - bstep ); 01282 __m128i x4 = _mm_loadl_u8_s16(srow, +1 - bstep*2); 01283 __m128i x5 = _mm_loadl_u8_s16(srow, +1 - bstep ); 01284 __m128i x6 = _mm_loadl_u8_s16(srow, +2 - bstep ); 01285 __m128i x7 = _mm_loadl_u8_s16(srow, +1 ); 01286 __m128i x8 = _mm_loadl_u8_s16(srow, +2 + bstep ); 01287 __m128i x9 = _mm_loadl_u8_s16(srow, +1 + bstep ); 01288 __m128i x10 = _mm_loadl_u8_s16(srow, +1 + bstep*2); 01289 __m128i x11 = _mm_loadl_u8_s16(srow, + bstep ); 01290 __m128i x12 = _mm_loadl_u8_s16(srow, -1 + bstep*2); 01291 __m128i x13 = _mm_loadl_u8_s16(srow, -1 + bstep ); 01292 __m128i x14 = _mm_loadl_u8_s16(srow, -2 + bstep ); 01293 __m128i x15 = _mm_loadl_u8_s16(srow, -1 ); 01294 __m128i x16 = _mm_loadl_u8_s16(srow, -2 - bstep ); 01295 01296 __m128i t0, t1, mask; 01297 01298 // gradN *********************************************** 01299 mask = _mm_cmpgt_epi16(T, gradN); // mask = T>gradN 01300 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradN) 01301 01302 t0 = _mm_slli_epi16(x3, 1); // srow[-bstep]*2 01303 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2), x0); // srow[-bstep*2] + srow[0] 01304 01305 // RGs += (srow[-bstep*2] + srow[0]) * (T>gradN) 01306 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01307 // GRs += {srow[-bstep]*2; (srow[-bstep*2-1] + srow[-bstep*2+1])} * (T>gradN) 01308 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x2,x4)), mask)); 01309 // Bs += {(srow[-bstep-1]+srow[-bstep+1]); srow[-bstep]*2 } * (T>gradN) 01310 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x5), t0), mask)); 01311 01312 // gradNE ********************************************** 01313 mask = _mm_cmpgt_epi16(T, gradNE); // mask = T>gradNE 01314 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNE) 01315 01316 t0 = _mm_slli_epi16(x5, 1); // srow[-bstep+1]*2 01317 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -bstep*2+2), x0); // srow[-bstep*2+2] + srow[0] 01318 01319 // RGs += {(srow[-bstep*2+2] + srow[0]); srow[-bstep+1]*2} * (T>gradNE) 01320 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01321 // GRs += {brow0[N6+1]; (srow[-bstep*2+1] + srow[1])} * (T>gradNE) 01322 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6+1)), _mm_adds_epi16(x4,x7)), mask)); 01323 // Bs += {srow[-bstep+1]*2; (srow[-bstep] + srow[-bstep+2])} * (T>gradNE) 01324 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x3,x6)), mask)); 01325 01326 // gradE *********************************************** 01327 mask = _mm_cmpgt_epi16(T, gradE); // mask = T>gradE 01328 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradE) 01329 01330 t0 = _mm_slli_epi16(x7, 1); // srow[1]*2 01331 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, 2), x0); // srow[2] + srow[0] 01332 01333 // RGs += (srow[2] + srow[0]) * (T>gradE) 01334 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01335 // GRs += (srow[1]*2) * (T>gradE) 01336 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask)); 01337 // Bs += {(srow[-bstep+1]+srow[bstep+1]); (srow[-bstep+2]+srow[bstep+2])} * (T>gradE) 01338 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x5,x9), _mm_adds_epi16(x6,x8)), mask)); 01339 01340 // gradSE ********************************************** 01341 mask = _mm_cmpgt_epi16(T, gradSE); // mask = T>gradSE 01342 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSE) 01343 01344 t0 = _mm_slli_epi16(x9, 1); // srow[bstep+1]*2 01345 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2+2), x0); // srow[bstep*2+2] + srow[0] 01346 01347 // RGs += {(srow[bstep*2+2] + srow[0]); srow[bstep+1]*2} * (T>gradSE) 01348 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01349 // GRs += {brow2[N6+1]; (srow[1]+srow[bstep*2+1])} * (T>gradSE) 01350 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6+1)), _mm_adds_epi16(x7,x10)), mask)); 01351 // Bs += {srow[-bstep+1]*2; (srow[bstep+2]+srow[bstep])} * (T>gradSE) 01352 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1), _mm_adds_epi16(x8,x11)), mask)); 01353 01354 // gradS *********************************************** 01355 mask = _mm_cmpgt_epi16(T, gradS); // mask = T>gradS 01356 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradS) 01357 01358 t0 = _mm_slli_epi16(x11, 1); // srow[bstep]*2 01359 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,bstep*2), x0); // srow[bstep*2]+srow[0] 01360 01361 // RGs += (srow[bstep*2]+srow[0]) * (T>gradS) 01362 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01363 // GRs += {srow[bstep]*2; (srow[bstep*2+1]+srow[bstep*2-1])} * (T>gradS) 01364 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(t0, _mm_adds_epi16(x10,x12)), mask)); 01365 // Bs += {(srow[bstep+1]+srow[bstep-1]); srow[bstep]*2} * (T>gradS) 01366 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x9,x13), t0), mask)); 01367 01368 // gradSW ********************************************** 01369 mask = _mm_cmpgt_epi16(T, gradSW); // mask = T>gradSW 01370 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradSW) 01371 01372 t0 = _mm_slli_epi16(x13, 1); // srow[bstep-1]*2 01373 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, bstep*2-2), x0); // srow[bstep*2-2]+srow[0] 01374 01375 // RGs += {(srow[bstep*2-2]+srow[0]); srow[bstep-1]*2} * (T>gradSW) 01376 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01377 // GRs += {brow2[N6-1]; (srow[bstep*2-1]+srow[-1])} * (T>gradSW) 01378 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow2+N6-1)), _mm_adds_epi16(x12,x15)), mask)); 01379 // Bs += {srow[bstep-1]*2; (srow[bstep]+srow[bstep-2])} * (T>gradSW) 01380 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(t0,_mm_adds_epi16(x11,x14)), mask)); 01381 01382 // gradW *********************************************** 01383 mask = _mm_cmpgt_epi16(T, gradW); // mask = T>gradW 01384 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradW) 01385 01386 t0 = _mm_slli_epi16(x15, 1); // srow[-1]*2 01387 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow, -2), x0); // srow[-2]+srow[0] 01388 01389 // RGs += (srow[-2]+srow[0]) * (T>gradW) 01390 RGs = _mm_adds_epi16(RGs, _mm_and_si128(t1, mask)); 01391 // GRs += (srow[-1]*2) * (T>gradW) 01392 GRs = _mm_adds_epi16(GRs, _mm_and_si128(t0, mask)); 01393 // Bs += {(srow[-bstep-1]+srow[bstep-1]); (srow[bstep-2]+srow[-bstep-2])} * (T>gradW) 01394 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_adds_epi16(x1,x13), _mm_adds_epi16(x14,x16)), mask)); 01395 01396 // gradNW ********************************************** 01397 mask = _mm_cmpgt_epi16(T, gradNW); // mask = T>gradNW 01398 ng = _mm_sub_epi16(ng, mask); // ng += (T>gradNW) 01399 01400 t0 = _mm_slli_epi16(x1, 1); // srow[-bstep-1]*2 01401 t1 = _mm_adds_epi16(_mm_loadl_u8_s16(srow,-bstep*2-2), x0); // srow[-bstep*2-2]+srow[0] 01402 01403 // RGs += {(srow[-bstep*2-2]+srow[0]); srow[-bstep-1]*2} * (T>gradNW) 01404 RGs = _mm_adds_epi16(RGs, _mm_and_si128(_mm_merge_epi16(t1, t0), mask)); 01405 // GRs += {brow0[N6-1]; (srow[-bstep*2-1]+srow[-1])} * (T>gradNW) 01406 GRs = _mm_adds_epi16(GRs, _mm_and_si128(_mm_merge_epi16(_mm_loadu_si128((__m128i*)(brow0+N6-1)), _mm_adds_epi16(x2,x15)), mask)); 01407 // Bs += {srow[-bstep-1]*2; (srow[-bstep]+srow[-bstep-2])} * (T>gradNW) 01408 Bs = _mm_adds_epi16(Bs, _mm_and_si128(_mm_merge_epi16(_mm_slli_epi16(x5, 1),_mm_adds_epi16(x3,x16)), mask)); 01409 01410 __m128 ngf0 = _mm_div_ps(_0_5, _mm_cvtloepi16_ps(ng)); 01411 __m128 ngf1 = _mm_div_ps(_0_5, _mm_cvthiepi16_ps(ng)); 01412 01413 // now interpolate r, g & b 01414 t0 = _mm_subs_epi16(GRs, RGs); 01415 t1 = _mm_subs_epi16(Bs, RGs); 01416 01417 t0 = _mm_add_epi16(x0, _mm_packs_epi32( 01418 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t0), ngf0)), 01419 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t0), ngf1)))); 01420 01421 t1 = _mm_add_epi16(x0, _mm_packs_epi32( 01422 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtloepi16_ps(t1), ngf0)), 01423 _mm_cvtps_epi32(_mm_mul_ps(_mm_cvthiepi16_ps(t1), ngf1)))); 01424 01425 x1 = _mm_merge_epi16(x0, t0); 01426 x2 = _mm_merge_epi16(t0, x0); 01427 01428 uchar R[8], G[8], B[8]; 01429 01430 _mm_storel_epi64(blueIdx ? (__m128i*)B : (__m128i*)R, _mm_packus_epi16(x1, z)); 01431 _mm_storel_epi64((__m128i*)G, _mm_packus_epi16(x2, z)); 01432 _mm_storel_epi64(blueIdx ? (__m128i*)R : (__m128i*)B, _mm_packus_epi16(t1, z)); 01433 01434 for( int j = 0; j < 8; j++, dstrow += 3 ) 01435 { 01436 dstrow[0] = B[j]; dstrow[1] = G[j]; dstrow[2] = R[j]; 01437 } 01438 } 01439 #endif 01440 01441 limit = N - 2; 01442 } 01443 while( i < N - 2 ); 01444 01445 for( i = 0; i < 6; i++ ) 01446 { 01447 dst[dststep*y + 5 - i] = dst[dststep*y + 8 - i]; 01448 dst[dststep*y + (N - 2)*3 + i] = dst[dststep*y + (N - 3)*3 + i]; 01449 } 01450 01451 greenCell0 = !greenCell0; 01452 blueIdx ^= 2; 01453 } 01454 01455 for( i = 0; i < size.width*3; i++ ) 01456 { 01457 dst[i] = dst[i + dststep] = dst[i + dststep*2]; 01458 dst[i + dststep*(size.height-4)] = 01459 dst[i + dststep*(size.height-3)] = 01460 dst[i + dststep*(size.height-2)] = 01461 dst[i + dststep*(size.height-1)] = dst[i + dststep*(size.height-5)]; 01462 } 01463 } 01464 01465 //////////////////////////////// Edge-Aware Demosaicing ////////////////////////////////// 01466 01467 template <typename T, typename SIMDInterpolator> 01468 class Bayer2RGB_EdgeAware_T_Invoker : 01469 public cv::ParallelLoopBody 01470 { 01471 public: 01472 Bayer2RGB_EdgeAware_T_Invoker(const Mat& _src, Mat& _dst, const Size& _size, 01473 int _blue, int _start_with_green) : 01474 ParallelLoopBody(), 01475 src(_src), dst(_dst), size(_size), Blue(_blue), Start_with_green(_start_with_green) 01476 { 01477 } 01478 01479 virtual void operator()(const Range& range) const 01480 { 01481 int dcn = dst.channels(); 01482 int dcn2 = dcn<<1; 01483 int start_with_green = Start_with_green, blue = Blue; 01484 int sstep = int(src.step / src.elemSize1()), dstep = int(dst.step / dst.elemSize1()); 01485 SIMDInterpolator vecOp; 01486 01487 const T* S = src.ptr<T>(range.start + 1) + 1; 01488 T* D = reinterpret_cast<T*>(dst.data + (range.start + 1) * dst.step) + dcn; 01489 01490 if (range.start % 2) 01491 { 01492 start_with_green ^= 1; 01493 blue ^= 1; 01494 } 01495 01496 // to BGR 01497 for (int y = range.start; y < range.end; ++y) 01498 { 01499 int x = 1; 01500 if (start_with_green) 01501 { 01502 D[blue<<1] = (S[-sstep] + S[sstep]) >> 1; 01503 D[1] = S[0]; 01504 D[2-(blue<<1)] = (S[-1] + S[1]) >> 1; 01505 D += dcn; 01506 ++S; 01507 ++x; 01508 } 01509 01510 int delta = vecOp.bayer2RGB_EA(S - sstep - 1, sstep, D, size.width, blue); 01511 x += delta; 01512 S += delta; 01513 D += dcn * delta; 01514 01515 if (blue) 01516 for (; x < size.width; x += 2, S += 2, D += dcn2) 01517 { 01518 D[0] = S[0]; 01519 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 01520 D[2] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1]) >> 2; 01521 01522 D[3] = (S[0] + S[2] + 1) >> 1; 01523 D[4] = S[1]; 01524 D[5] = (S[-sstep+1] + S[sstep+1] + 1) >> 1; 01525 } 01526 else 01527 for (; x < size.width; x += 2, S += 2, D += dcn2) 01528 { 01529 D[0] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2; 01530 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 01531 D[2] = S[0]; 01532 01533 D[3] = (S[-sstep+1] + S[sstep+1] + 1) >> 1; 01534 D[4] = S[1]; 01535 D[5] = (S[0] + S[2] + 1) >> 1; 01536 } 01537 01538 if (x <= size.width) 01539 { 01540 D[blue<<1] = (S[-sstep-1] + S[-sstep+1] + S[sstep-1] + S[sstep+1] + 2) >> 2; 01541 D[1] = (std::abs(S[-1] - S[1]) > std::abs(S[sstep] - S[-sstep]) ? (S[sstep] + S[-sstep] + 1) : (S[-1] + S[1] + 1)) >> 1; 01542 D[2-(blue<<1)] = S[0]; 01543 D += dcn; 01544 ++S; 01545 } 01546 01547 for (int i = 0; i < dcn; ++i) 01548 { 01549 D[i] = D[-dcn + i]; 01550 D[-dstep+dcn+i] = D[-dstep+(dcn<<1)+i]; 01551 } 01552 01553 start_with_green ^= 1; 01554 blue ^= 1; 01555 S += 2; 01556 D += dcn2; 01557 } 01558 } 01559 01560 private: 01561 Mat src; 01562 Mat dst; 01563 Size size; 01564 int Blue, Start_with_green; 01565 }; 01566 01567 template <typename T, typename SIMDInterpolator> 01568 static void Bayer2RGB_EdgeAware_T(const Mat& src, Mat& dst, int code) 01569 { 01570 Size size = src.size(); 01571 01572 // for small sizes 01573 if (size.width <= 2 || size.height <= 2) 01574 { 01575 dst = Scalar::all(0); 01576 return; 01577 } 01578 01579 size.width -= 2; 01580 size.height -= 2; 01581 01582 int start_with_green = code == CV_BayerGB2BGR_EA || code == CV_BayerGR2BGR_EA ? 1 : 0; 01583 int blue = code == CV_BayerGB2BGR_EA || code == CV_BayerBG2BGR_EA ? 1 : 0; 01584 01585 if (size.height > 0) 01586 { 01587 Bayer2RGB_EdgeAware_T_Invoker<T, SIMDInterpolator> invoker(src, dst, size, blue, start_with_green); 01588 Range range(0, size.height); 01589 parallel_for_(range, invoker, dst.total()/static_cast<double>(1<<16)); 01590 } 01591 size = dst.size(); 01592 size.width *= dst.channels(); 01593 size_t dstep = dst.step / dst.elemSize1(); 01594 T* firstRow = dst.ptr<T>(); 01595 T* lastRow = dst.ptr<T>() + (size.height-1) * dstep; 01596 01597 if (size.height > 2) 01598 { 01599 for (int x = 0; x < size.width; ++x) 01600 { 01601 firstRow[x] = (firstRow+dstep)[x]; 01602 lastRow[x] = (lastRow-dstep)[x]; 01603 } 01604 } 01605 else 01606 for (int x = 0; x < size.width; ++x) 01607 firstRow[x] = lastRow[x] = 0; 01608 } 01609 01610 } // end namespace cv 01611 01612 ////////////////////////////////////////////////////////////////////////////////////////// 01613 // The main Demosaicing function // 01614 ////////////////////////////////////////////////////////////////////////////////////////// 01615 01616 void cv::demosaicing(InputArray _src, OutputArray _dst, int code, int dcn) 01617 { 01618 Mat src = _src.getMat(), dst; 01619 Size sz = src.size(); 01620 int scn = src.channels(), depth = src.depth(); 01621 01622 CV_Assert(depth == CV_8U || depth == CV_16U); 01623 CV_Assert(!src.empty()); 01624 01625 switch (code) 01626 { 01627 case CV_BayerBG2GRAY: case CV_BayerGB2GRAY: case CV_BayerRG2GRAY: case CV_BayerGR2GRAY: 01628 if (dcn <= 0) 01629 dcn = 1; 01630 CV_Assert( scn == 1 && dcn == 1 ); 01631 01632 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 01633 dst = _dst.getMat(); 01634 01635 if( depth == CV_8U ) 01636 Bayer2Gray_<uchar, SIMDBayerInterpolator_8u>(src, dst, code); 01637 else if( depth == CV_16U ) 01638 Bayer2Gray_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code); 01639 else 01640 CV_Error(CV_StsUnsupportedFormat, "Bayer->Gray demosaicing only supports 8u and 16u types"); 01641 break; 01642 01643 case CV_BayerBG2BGR: case CV_BayerGB2BGR: case CV_BayerRG2BGR: case CV_BayerGR2BGR: 01644 case CV_BayerBG2BGR_VNG: case CV_BayerGB2BGR_VNG: case CV_BayerRG2BGR_VNG: case CV_BayerGR2BGR_VNG: 01645 { 01646 if (dcn <= 0) 01647 dcn = 3; 01648 CV_Assert( scn == 1 && (dcn == 3 || dcn == 4) ); 01649 01650 _dst.create(sz, CV_MAKE_TYPE(depth, dcn)); 01651 Mat dst_ = _dst.getMat(); 01652 01653 if( code == CV_BayerBG2BGR || code == CV_BayerGB2BGR || 01654 code == CV_BayerRG2BGR || code == CV_BayerGR2BGR ) 01655 { 01656 if( depth == CV_8U ) 01657 Bayer2RGB_<uchar, SIMDBayerInterpolator_8u>(src, dst_, code); 01658 else if( depth == CV_16U ) 01659 Bayer2RGB_<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst_, code); 01660 else 01661 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB demosaicing only supports 8u and 16u types"); 01662 } 01663 else 01664 { 01665 CV_Assert( depth == CV_8U ); 01666 Bayer2RGB_VNG_8u(src, dst_, code); 01667 } 01668 } 01669 break; 01670 01671 case CV_BayerBG2BGR_EA: case CV_BayerGB2BGR_EA: case CV_BayerRG2BGR_EA: case CV_BayerGR2BGR_EA: 01672 if (dcn <= 0) 01673 dcn = 3; 01674 01675 CV_Assert(scn == 1 && dcn == 3); 01676 _dst.create(sz, CV_MAKETYPE(depth, dcn)); 01677 dst = _dst.getMat(); 01678 01679 if (depth == CV_8U) 01680 Bayer2RGB_EdgeAware_T<uchar, SIMDBayerInterpolator_8u>(src, dst, code); 01681 else if (depth == CV_16U) 01682 Bayer2RGB_EdgeAware_T<ushort, SIMDBayerStubInterpolator_<ushort> >(src, dst, code); 01683 else 01684 CV_Error(CV_StsUnsupportedFormat, "Bayer->RGB Edge-Aware demosaicing only currently supports 8u and 16u types"); 01685 01686 break; 01687 01688 default: 01689 CV_Error( CV_StsBadFlag, "Unknown / unsupported color conversion code" ); 01690 } 01691 } 01692
Generated on Tue Jul 12 2022 14:46:32 by
1.7.2
