Final 350 project
Dependencies: uzair Camera_LS_Y201 F7_Ethernet LCD_DISCO_F746NG NetworkAPI SDFileSystem mbed
jidctint.c
00001 /* 00002 * jidctint.c 00003 * 00004 * Copyright (C) 1991-1998, Thomas G. Lane. 00005 * Modification developed 2002-2015 by Guido Vollbeding. 00006 * This file is part of the Independent JPEG Group's software. 00007 * For conditions of distribution and use, see the accompanying README file. 00008 * 00009 * This file contains a slow-but-accurate integer implementation of the 00010 * inverse DCT (Discrete Cosine Transform). In the IJG code, this routine 00011 * must also perform dequantization of the input coefficients. 00012 * 00013 * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT 00014 * on each row (or vice versa, but it's more convenient to emit a row at 00015 * a time). Direct algorithms are also available, but they are much more 00016 * complex and seem not to be any faster when reduced to code. 00017 * 00018 * This implementation is based on an algorithm described in 00019 * C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT 00020 * Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics, 00021 * Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991. 00022 * The primary algorithm described there uses 11 multiplies and 29 adds. 00023 * We use their alternate method with 12 multiplies and 32 adds. 00024 * The advantage of this method is that no data path contains more than one 00025 * multiplication; this allows a very simple and accurate implementation in 00026 * scaled fixed-point arithmetic, with a minimal number of shifts. 00027 * 00028 * We also provide IDCT routines with various output sample block sizes for 00029 * direct resolution reduction or enlargement and for direct resolving the 00030 * common 2x1 and 1x2 subsampling cases without additional resampling: NxN 00031 * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block. 00032 * 00033 * For N<8 we simply take the corresponding low-frequency coefficients of 00034 * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block 00035 * to yield the downscaled outputs. 00036 * This can be seen as direct low-pass downsampling from the DCT domain 00037 * point of view rather than the usual spatial domain point of view, 00038 * yielding significant computational savings and results at least 00039 * as good as common bilinear (averaging) spatial downsampling. 00040 * 00041 * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as 00042 * lower frequencies and higher frequencies assumed to be zero. 00043 * It turns out that the computational effort is similar to the 8x8 IDCT 00044 * regarding the output size. 00045 * Furthermore, the scaling and descaling is the same for all IDCT sizes. 00046 * 00047 * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases 00048 * since there would be too many additional constants to pre-calculate. 00049 */ 00050 00051 #define JPEG_INTERNALS 00052 #include "jinclude.h" 00053 #include "jpeglib.h" 00054 #include "jdct.h" /* Private declarations for DCT subsystem */ 00055 00056 #ifdef DCT_ISLOW_SUPPORTED 00057 00058 00059 /* 00060 * This module is specialized to the case DCTSIZE = 8. 00061 */ 00062 00063 #if DCTSIZE != 8 00064 Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */ 00065 #endif 00066 00067 00068 /* 00069 * The poop on this scaling stuff is as follows: 00070 * 00071 * Each 1-D IDCT step produces outputs which are a factor of sqrt(N) 00072 * larger than the true IDCT outputs. The final outputs are therefore 00073 * a factor of N larger than desired; since N=8 this can be cured by 00074 * a simple right shift at the end of the algorithm. The advantage of 00075 * this arrangement is that we save two multiplications per 1-D IDCT, 00076 * because the y0 and y4 inputs need not be divided by sqrt(N). 00077 * 00078 * We have to do addition and subtraction of the integer inputs, which 00079 * is no problem, and multiplication by fractional constants, which is 00080 * a problem to do in integer arithmetic. We multiply all the constants 00081 * by CONST_SCALE and convert them to integer constants (thus retaining 00082 * CONST_BITS bits of precision in the constants). After doing a 00083 * multiplication we have to divide the product by CONST_SCALE, with proper 00084 * rounding, to produce the correct output. This division can be done 00085 * cheaply as a right shift of CONST_BITS bits. We postpone shifting 00086 * as long as possible so that partial sums can be added together with 00087 * full fractional precision. 00088 * 00089 * The outputs of the first pass are scaled up by PASS1_BITS bits so that 00090 * they are represented to better-than-integral precision. These outputs 00091 * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word 00092 * with the recommended scaling. (To scale up 12-bit sample data further, an 00093 * intermediate INT32 array would be needed.) 00094 * 00095 * To avoid overflow of the 32-bit intermediate results in pass 2, we must 00096 * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26. Error analysis 00097 * shows that the values given below are the most effective. 00098 */ 00099 00100 #if BITS_IN_JSAMPLE == 8 00101 #define CONST_BITS 13 00102 #define PASS1_BITS 2 00103 #else 00104 #define CONST_BITS 13 00105 #define PASS1_BITS 1 /* lose a little precision to avoid overflow */ 00106 #endif 00107 00108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus 00109 * causing a lot of useless floating-point operations at run time. 00110 * To get around this we use the following pre-calculated constants. 00111 * If you change CONST_BITS you may want to add appropriate values. 00112 * (With a reasonable C compiler, you can just rely on the FIX() macro...) 00113 */ 00114 00115 #if CONST_BITS == 13 00116 #define FIX_0_298631336 ((INT32) 2446) /* FIX(0.298631336) */ 00117 #define FIX_0_390180644 ((INT32) 3196) /* FIX(0.390180644) */ 00118 #define FIX_0_541196100 ((INT32) 4433) /* FIX(0.541196100) */ 00119 #define FIX_0_765366865 ((INT32) 6270) /* FIX(0.765366865) */ 00120 #define FIX_0_899976223 ((INT32) 7373) /* FIX(0.899976223) */ 00121 #define FIX_1_175875602 ((INT32) 9633) /* FIX(1.175875602) */ 00122 #define FIX_1_501321110 ((INT32) 12299) /* FIX(1.501321110) */ 00123 #define FIX_1_847759065 ((INT32) 15137) /* FIX(1.847759065) */ 00124 #define FIX_1_961570560 ((INT32) 16069) /* FIX(1.961570560) */ 00125 #define FIX_2_053119869 ((INT32) 16819) /* FIX(2.053119869) */ 00126 #define FIX_2_562915447 ((INT32) 20995) /* FIX(2.562915447) */ 00127 #define FIX_3_072711026 ((INT32) 25172) /* FIX(3.072711026) */ 00128 #else 00129 #define FIX_0_298631336 FIX(0.298631336) 00130 #define FIX_0_390180644 FIX(0.390180644) 00131 #define FIX_0_541196100 FIX(0.541196100) 00132 #define FIX_0_765366865 FIX(0.765366865) 00133 #define FIX_0_899976223 FIX(0.899976223) 00134 #define FIX_1_175875602 FIX(1.175875602) 00135 #define FIX_1_501321110 FIX(1.501321110) 00136 #define FIX_1_847759065 FIX(1.847759065) 00137 #define FIX_1_961570560 FIX(1.961570560) 00138 #define FIX_2_053119869 FIX(2.053119869) 00139 #define FIX_2_562915447 FIX(2.562915447) 00140 #define FIX_3_072711026 FIX(3.072711026) 00141 #endif 00142 00143 00144 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result. 00145 * For 8-bit samples with the recommended scaling, all the variable 00146 * and constant values involved are no more than 16 bits wide, so a 00147 * 16x16->32 bit multiply can be used instead of a full 32x32 multiply. 00148 * For 12-bit samples, a full 32-bit multiplication will be needed. 00149 */ 00150 00151 #if BITS_IN_JSAMPLE == 8 00152 #define MULTIPLY(var,const) MULTIPLY16C16(var,const) 00153 #else 00154 #define MULTIPLY(var,const) ((var) * (const)) 00155 #endif 00156 00157 00158 /* Dequantize a coefficient by multiplying it by the multiplier-table 00159 * entry; produce an int result. In this module, both inputs and result 00160 * are 16 bits or less, so either int or short multiply will work. 00161 */ 00162 00163 #define DEQUANTIZE(coef,quantval) (((ISLOW_MULT_TYPE) (coef)) * (quantval)) 00164 00165 00166 /* 00167 * Perform dequantization and inverse DCT on one block of coefficients. 00168 * 00169 * cK represents sqrt(2) * cos(K*pi/16). 00170 */ 00171 00172 GLOBAL(void) 00173 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr, 00174 JCOEFPTR coef_block, 00175 JSAMPARRAY output_buf, JDIMENSION output_col) 00176 { 00177 INT32 tmp0, tmp1, tmp2, tmp3; 00178 INT32 tmp10, tmp11, tmp12, tmp13; 00179 INT32 z1, z2, z3; 00180 JCOEFPTR inptr; 00181 ISLOW_MULT_TYPE * quantptr; 00182 int * wsptr; 00183 JSAMPROW outptr; 00184 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 00185 int ctr; 00186 int workspace[DCTSIZE2]; /* buffers data between passes */ 00187 SHIFT_TEMPS 00188 00189 /* Pass 1: process columns from input, store into work array. 00190 * Note results are scaled up by sqrt(8) compared to a true IDCT; 00191 * furthermore, we scale the results by 2**PASS1_BITS. 00192 */ 00193 00194 inptr = coef_block; 00195 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 00196 wsptr = workspace; 00197 for (ctr = DCTSIZE; ctr > 0; ctr--) { 00198 /* Due to quantization, we will usually find that many of the input 00199 * coefficients are zero, especially the AC terms. We can exploit this 00200 * by short-circuiting the IDCT calculation for any column in which all 00201 * the AC terms are zero. In that case each output is equal to the 00202 * DC coefficient (with scale factor as needed). 00203 * With typical images and quantization tables, half or more of the 00204 * column DCT calculations can be simplified this way. 00205 */ 00206 00207 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 00208 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 00209 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 00210 inptr[DCTSIZE*7] == 0) { 00211 /* AC terms all zero */ 00212 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 00213 00214 wsptr[DCTSIZE*0] = dcval; 00215 wsptr[DCTSIZE*1] = dcval; 00216 wsptr[DCTSIZE*2] = dcval; 00217 wsptr[DCTSIZE*3] = dcval; 00218 wsptr[DCTSIZE*4] = dcval; 00219 wsptr[DCTSIZE*5] = dcval; 00220 wsptr[DCTSIZE*6] = dcval; 00221 wsptr[DCTSIZE*7] = dcval; 00222 00223 inptr++; /* advance pointers to next column */ 00224 quantptr++; 00225 wsptr++; 00226 continue; 00227 } 00228 00229 /* Even part: reverse the even part of the forward DCT. 00230 * The rotator is c(-6). 00231 */ 00232 00233 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 00234 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 00235 z2 <<= CONST_BITS; 00236 z3 <<= CONST_BITS; 00237 /* Add fudge factor here for final descale. */ 00238 z2 += ONE << (CONST_BITS-PASS1_BITS-1); 00239 00240 tmp0 = z2 + z3; 00241 tmp1 = z2 - z3; 00242 00243 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 00244 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 00245 00246 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 00247 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 00248 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 00249 00250 tmp10 = tmp0 + tmp2; 00251 tmp13 = tmp0 - tmp2; 00252 tmp11 = tmp1 + tmp3; 00253 tmp12 = tmp1 - tmp3; 00254 00255 /* Odd part per figure 8; the matrix is unitary and hence its 00256 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 00257 */ 00258 00259 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 00260 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 00261 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 00262 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 00263 00264 z2 = tmp0 + tmp2; 00265 z3 = tmp1 + tmp3; 00266 00267 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 00268 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 00269 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 00270 z2 += z1; 00271 z3 += z1; 00272 00273 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 00274 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 00275 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 00276 tmp0 += z1 + z2; 00277 tmp3 += z1 + z3; 00278 00279 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 00280 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 00281 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 00282 tmp1 += z1 + z3; 00283 tmp2 += z1 + z2; 00284 00285 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 00286 00287 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 00288 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 00289 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 00290 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 00291 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 00292 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 00293 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 00294 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 00295 00296 inptr++; /* advance pointers to next column */ 00297 quantptr++; 00298 wsptr++; 00299 } 00300 00301 /* Pass 2: process rows from work array, store into output array. 00302 * Note that we must descale the results by a factor of 8 == 2**3, 00303 * and also undo the PASS1_BITS scaling. 00304 */ 00305 00306 wsptr = workspace; 00307 for (ctr = 0; ctr < DCTSIZE; ctr++) { 00308 outptr = output_buf[ctr] + output_col; 00309 00310 /* Add range center and fudge factor for final descale and range-limit. */ 00311 z2 = (INT32) wsptr[0] + 00312 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 00313 (ONE << (PASS1_BITS+2))); 00314 00315 /* Rows of zeroes can be exploited in the same way as we did with columns. 00316 * However, the column calculation has created many nonzero AC terms, so 00317 * the simplification applies less often (typically 5% to 10% of the time). 00318 * On machines with very fast multiplication, it's possible that the 00319 * test takes more time than it's worth. In that case this section 00320 * may be commented out. 00321 */ 00322 00323 #ifndef NO_ZERO_ROW_TEST 00324 if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 && 00325 wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) { 00326 /* AC terms all zero */ 00327 JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3) 00328 & RANGE_MASK]; 00329 00330 outptr[0] = dcval; 00331 outptr[1] = dcval; 00332 outptr[2] = dcval; 00333 outptr[3] = dcval; 00334 outptr[4] = dcval; 00335 outptr[5] = dcval; 00336 outptr[6] = dcval; 00337 outptr[7] = dcval; 00338 00339 wsptr += DCTSIZE; /* advance pointer to next row */ 00340 continue; 00341 } 00342 #endif 00343 00344 /* Even part: reverse the even part of the forward DCT. 00345 * The rotator is c(-6). 00346 */ 00347 00348 z3 = (INT32) wsptr[4]; 00349 00350 tmp0 = (z2 + z3) << CONST_BITS; 00351 tmp1 = (z2 - z3) << CONST_BITS; 00352 00353 z2 = (INT32) wsptr[2]; 00354 z3 = (INT32) wsptr[6]; 00355 00356 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 00357 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 00358 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 00359 00360 tmp10 = tmp0 + tmp2; 00361 tmp13 = tmp0 - tmp2; 00362 tmp11 = tmp1 + tmp3; 00363 tmp12 = tmp1 - tmp3; 00364 00365 /* Odd part per figure 8; the matrix is unitary and hence its 00366 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 00367 */ 00368 00369 tmp0 = (INT32) wsptr[7]; 00370 tmp1 = (INT32) wsptr[5]; 00371 tmp2 = (INT32) wsptr[3]; 00372 tmp3 = (INT32) wsptr[1]; 00373 00374 z2 = tmp0 + tmp2; 00375 z3 = tmp1 + tmp3; 00376 00377 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 00378 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 00379 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 00380 z2 += z1; 00381 z3 += z1; 00382 00383 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 00384 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 00385 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 00386 tmp0 += z1 + z2; 00387 tmp3 += z1 + z3; 00388 00389 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 00390 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 00391 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 00392 tmp1 += z1 + z3; 00393 tmp2 += z1 + z2; 00394 00395 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 00396 00397 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, 00398 CONST_BITS+PASS1_BITS+3) 00399 & RANGE_MASK]; 00400 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, 00401 CONST_BITS+PASS1_BITS+3) 00402 & RANGE_MASK]; 00403 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, 00404 CONST_BITS+PASS1_BITS+3) 00405 & RANGE_MASK]; 00406 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, 00407 CONST_BITS+PASS1_BITS+3) 00408 & RANGE_MASK]; 00409 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, 00410 CONST_BITS+PASS1_BITS+3) 00411 & RANGE_MASK]; 00412 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, 00413 CONST_BITS+PASS1_BITS+3) 00414 & RANGE_MASK]; 00415 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, 00416 CONST_BITS+PASS1_BITS+3) 00417 & RANGE_MASK]; 00418 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, 00419 CONST_BITS+PASS1_BITS+3) 00420 & RANGE_MASK]; 00421 00422 wsptr += DCTSIZE; /* advance pointer to next row */ 00423 } 00424 } 00425 00426 #ifdef IDCT_SCALING_SUPPORTED 00427 00428 00429 /* 00430 * Perform dequantization and inverse DCT on one block of coefficients, 00431 * producing a 7x7 output block. 00432 * 00433 * Optimized algorithm with 12 multiplications in the 1-D kernel. 00434 * cK represents sqrt(2) * cos(K*pi/14). 00435 */ 00436 00437 GLOBAL(void) 00438 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 00439 JCOEFPTR coef_block, 00440 JSAMPARRAY output_buf, JDIMENSION output_col) 00441 { 00442 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13; 00443 INT32 z1, z2, z3; 00444 JCOEFPTR inptr; 00445 ISLOW_MULT_TYPE * quantptr; 00446 int * wsptr; 00447 JSAMPROW outptr; 00448 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 00449 int ctr; 00450 int workspace[7*7]; /* buffers data between passes */ 00451 SHIFT_TEMPS 00452 00453 /* Pass 1: process columns from input, store into work array. */ 00454 00455 inptr = coef_block; 00456 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 00457 wsptr = workspace; 00458 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { 00459 /* Even part */ 00460 00461 tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 00462 tmp13 <<= CONST_BITS; 00463 /* Add fudge factor here for final descale. */ 00464 tmp13 += ONE << (CONST_BITS-PASS1_BITS-1); 00465 00466 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 00467 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 00468 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 00469 00470 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 00471 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 00472 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 00473 tmp0 = z1 + z3; 00474 z2 -= tmp0; 00475 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 00476 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 00477 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 00478 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 00479 00480 /* Odd part */ 00481 00482 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 00483 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 00484 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 00485 00486 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 00487 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 00488 tmp0 = tmp1 - tmp2; 00489 tmp1 += tmp2; 00490 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 00491 tmp1 += tmp2; 00492 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 00493 tmp0 += z2; 00494 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 00495 00496 /* Final output stage */ 00497 00498 wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 00499 wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 00500 wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 00501 wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 00502 wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 00503 wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 00504 wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS); 00505 } 00506 00507 /* Pass 2: process 7 rows from work array, store into output array. */ 00508 00509 wsptr = workspace; 00510 for (ctr = 0; ctr < 7; ctr++) { 00511 outptr = output_buf[ctr] + output_col; 00512 00513 /* Even part */ 00514 00515 /* Add range center and fudge factor for final descale and range-limit. */ 00516 tmp13 = (INT32) wsptr[0] + 00517 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 00518 (ONE << (PASS1_BITS+2))); 00519 tmp13 <<= CONST_BITS; 00520 00521 z1 = (INT32) wsptr[2]; 00522 z2 = (INT32) wsptr[4]; 00523 z3 = (INT32) wsptr[6]; 00524 00525 tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 00526 tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 00527 tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 00528 tmp0 = z1 + z3; 00529 z2 -= tmp0; 00530 tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */ 00531 tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 00532 tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 00533 tmp13 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 00534 00535 /* Odd part */ 00536 00537 z1 = (INT32) wsptr[1]; 00538 z2 = (INT32) wsptr[3]; 00539 z3 = (INT32) wsptr[5]; 00540 00541 tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 00542 tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 00543 tmp0 = tmp1 - tmp2; 00544 tmp1 += tmp2; 00545 tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 00546 tmp1 += tmp2; 00547 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 00548 tmp0 += z2; 00549 tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 00550 00551 /* Final output stage */ 00552 00553 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 00554 CONST_BITS+PASS1_BITS+3) 00555 & RANGE_MASK]; 00556 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 00557 CONST_BITS+PASS1_BITS+3) 00558 & RANGE_MASK]; 00559 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 00560 CONST_BITS+PASS1_BITS+3) 00561 & RANGE_MASK]; 00562 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 00563 CONST_BITS+PASS1_BITS+3) 00564 & RANGE_MASK]; 00565 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 00566 CONST_BITS+PASS1_BITS+3) 00567 & RANGE_MASK]; 00568 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 00569 CONST_BITS+PASS1_BITS+3) 00570 & RANGE_MASK]; 00571 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13, 00572 CONST_BITS+PASS1_BITS+3) 00573 & RANGE_MASK]; 00574 00575 wsptr += 7; /* advance pointer to next row */ 00576 } 00577 } 00578 00579 00580 /* 00581 * Perform dequantization and inverse DCT on one block of coefficients, 00582 * producing a reduced-size 6x6 output block. 00583 * 00584 * Optimized algorithm with 3 multiplications in the 1-D kernel. 00585 * cK represents sqrt(2) * cos(K*pi/12). 00586 */ 00587 00588 GLOBAL(void) 00589 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 00590 JCOEFPTR coef_block, 00591 JSAMPARRAY output_buf, JDIMENSION output_col) 00592 { 00593 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 00594 INT32 z1, z2, z3; 00595 JCOEFPTR inptr; 00596 ISLOW_MULT_TYPE * quantptr; 00597 int * wsptr; 00598 JSAMPROW outptr; 00599 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 00600 int ctr; 00601 int workspace[6*6]; /* buffers data between passes */ 00602 SHIFT_TEMPS 00603 00604 /* Pass 1: process columns from input, store into work array. */ 00605 00606 inptr = coef_block; 00607 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 00608 wsptr = workspace; 00609 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 00610 /* Even part */ 00611 00612 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 00613 tmp0 <<= CONST_BITS; 00614 /* Add fudge factor here for final descale. */ 00615 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 00616 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 00617 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 00618 tmp1 = tmp0 + tmp10; 00619 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); 00620 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 00621 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 00622 tmp10 = tmp1 + tmp0; 00623 tmp12 = tmp1 - tmp0; 00624 00625 /* Odd part */ 00626 00627 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 00628 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 00629 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 00630 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 00631 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 00632 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 00633 tmp1 = (z1 - z2 - z3) << PASS1_BITS; 00634 00635 /* Final output stage */ 00636 00637 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 00638 wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 00639 wsptr[6*1] = (int) (tmp11 + tmp1); 00640 wsptr[6*4] = (int) (tmp11 - tmp1); 00641 wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 00642 wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 00643 } 00644 00645 /* Pass 2: process 6 rows from work array, store into output array. */ 00646 00647 wsptr = workspace; 00648 for (ctr = 0; ctr < 6; ctr++) { 00649 outptr = output_buf[ctr] + output_col; 00650 00651 /* Even part */ 00652 00653 /* Add range center and fudge factor for final descale and range-limit. */ 00654 tmp0 = (INT32) wsptr[0] + 00655 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 00656 (ONE << (PASS1_BITS+2))); 00657 tmp0 <<= CONST_BITS; 00658 tmp2 = (INT32) wsptr[4]; 00659 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 00660 tmp1 = tmp0 + tmp10; 00661 tmp11 = tmp0 - tmp10 - tmp10; 00662 tmp10 = (INT32) wsptr[2]; 00663 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 00664 tmp10 = tmp1 + tmp0; 00665 tmp12 = tmp1 - tmp0; 00666 00667 /* Odd part */ 00668 00669 z1 = (INT32) wsptr[1]; 00670 z2 = (INT32) wsptr[3]; 00671 z3 = (INT32) wsptr[5]; 00672 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 00673 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 00674 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 00675 tmp1 = (z1 - z2 - z3) << CONST_BITS; 00676 00677 /* Final output stage */ 00678 00679 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 00680 CONST_BITS+PASS1_BITS+3) 00681 & RANGE_MASK]; 00682 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 00683 CONST_BITS+PASS1_BITS+3) 00684 & RANGE_MASK]; 00685 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 00686 CONST_BITS+PASS1_BITS+3) 00687 & RANGE_MASK]; 00688 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 00689 CONST_BITS+PASS1_BITS+3) 00690 & RANGE_MASK]; 00691 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 00692 CONST_BITS+PASS1_BITS+3) 00693 & RANGE_MASK]; 00694 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 00695 CONST_BITS+PASS1_BITS+3) 00696 & RANGE_MASK]; 00697 00698 wsptr += 6; /* advance pointer to next row */ 00699 } 00700 } 00701 00702 00703 /* 00704 * Perform dequantization and inverse DCT on one block of coefficients, 00705 * producing a reduced-size 5x5 output block. 00706 * 00707 * Optimized algorithm with 5 multiplications in the 1-D kernel. 00708 * cK represents sqrt(2) * cos(K*pi/10). 00709 */ 00710 00711 GLOBAL(void) 00712 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 00713 JCOEFPTR coef_block, 00714 JSAMPARRAY output_buf, JDIMENSION output_col) 00715 { 00716 INT32 tmp0, tmp1, tmp10, tmp11, tmp12; 00717 INT32 z1, z2, z3; 00718 JCOEFPTR inptr; 00719 ISLOW_MULT_TYPE * quantptr; 00720 int * wsptr; 00721 JSAMPROW outptr; 00722 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 00723 int ctr; 00724 int workspace[5*5]; /* buffers data between passes */ 00725 SHIFT_TEMPS 00726 00727 /* Pass 1: process columns from input, store into work array. */ 00728 00729 inptr = coef_block; 00730 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 00731 wsptr = workspace; 00732 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { 00733 /* Even part */ 00734 00735 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 00736 tmp12 <<= CONST_BITS; 00737 /* Add fudge factor here for final descale. */ 00738 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); 00739 tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 00740 tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 00741 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 00742 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 00743 z3 = tmp12 + z2; 00744 tmp10 = z3 + z1; 00745 tmp11 = z3 - z1; 00746 tmp12 -= z2 << 2; 00747 00748 /* Odd part */ 00749 00750 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 00751 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 00752 00753 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 00754 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 00755 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 00756 00757 /* Final output stage */ 00758 00759 wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 00760 wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 00761 wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 00762 wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 00763 wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); 00764 } 00765 00766 /* Pass 2: process 5 rows from work array, store into output array. */ 00767 00768 wsptr = workspace; 00769 for (ctr = 0; ctr < 5; ctr++) { 00770 outptr = output_buf[ctr] + output_col; 00771 00772 /* Even part */ 00773 00774 /* Add range center and fudge factor for final descale and range-limit. */ 00775 tmp12 = (INT32) wsptr[0] + 00776 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 00777 (ONE << (PASS1_BITS+2))); 00778 tmp12 <<= CONST_BITS; 00779 tmp0 = (INT32) wsptr[2]; 00780 tmp1 = (INT32) wsptr[4]; 00781 z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */ 00782 z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */ 00783 z3 = tmp12 + z2; 00784 tmp10 = z3 + z1; 00785 tmp11 = z3 - z1; 00786 tmp12 -= z2 << 2; 00787 00788 /* Odd part */ 00789 00790 z2 = (INT32) wsptr[1]; 00791 z3 = (INT32) wsptr[3]; 00792 00793 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 00794 tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 00795 tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 00796 00797 /* Final output stage */ 00798 00799 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 00800 CONST_BITS+PASS1_BITS+3) 00801 & RANGE_MASK]; 00802 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 00803 CONST_BITS+PASS1_BITS+3) 00804 & RANGE_MASK]; 00805 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 00806 CONST_BITS+PASS1_BITS+3) 00807 & RANGE_MASK]; 00808 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 00809 CONST_BITS+PASS1_BITS+3) 00810 & RANGE_MASK]; 00811 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, 00812 CONST_BITS+PASS1_BITS+3) 00813 & RANGE_MASK]; 00814 00815 wsptr += 5; /* advance pointer to next row */ 00816 } 00817 } 00818 00819 00820 /* 00821 * Perform dequantization and inverse DCT on one block of coefficients, 00822 * producing a reduced-size 4x4 output block. 00823 * 00824 * Optimized algorithm with 3 multiplications in the 1-D kernel. 00825 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 00826 */ 00827 00828 GLOBAL(void) 00829 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 00830 JCOEFPTR coef_block, 00831 JSAMPARRAY output_buf, JDIMENSION output_col) 00832 { 00833 INT32 tmp0, tmp2, tmp10, tmp12; 00834 INT32 z1, z2, z3; 00835 JCOEFPTR inptr; 00836 ISLOW_MULT_TYPE * quantptr; 00837 int * wsptr; 00838 JSAMPROW outptr; 00839 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 00840 int ctr; 00841 int workspace[4*4]; /* buffers data between passes */ 00842 SHIFT_TEMPS 00843 00844 /* Pass 1: process columns from input, store into work array. */ 00845 00846 inptr = coef_block; 00847 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 00848 wsptr = workspace; 00849 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { 00850 /* Even part */ 00851 00852 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 00853 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 00854 00855 tmp10 = (tmp0 + tmp2) << PASS1_BITS; 00856 tmp12 = (tmp0 - tmp2) << PASS1_BITS; 00857 00858 /* Odd part */ 00859 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 00860 00861 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 00862 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 00863 00864 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 00865 /* Add fudge factor here for final descale. */ 00866 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 00867 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ 00868 CONST_BITS-PASS1_BITS); 00869 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ 00870 CONST_BITS-PASS1_BITS); 00871 00872 /* Final output stage */ 00873 00874 wsptr[4*0] = (int) (tmp10 + tmp0); 00875 wsptr[4*3] = (int) (tmp10 - tmp0); 00876 wsptr[4*1] = (int) (tmp12 + tmp2); 00877 wsptr[4*2] = (int) (tmp12 - tmp2); 00878 } 00879 00880 /* Pass 2: process 4 rows from work array, store into output array. */ 00881 00882 wsptr = workspace; 00883 for (ctr = 0; ctr < 4; ctr++) { 00884 outptr = output_buf[ctr] + output_col; 00885 00886 /* Even part */ 00887 00888 /* Add range center and fudge factor for final descale and range-limit. */ 00889 tmp0 = (INT32) wsptr[0] + 00890 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 00891 (ONE << (PASS1_BITS+2))); 00892 tmp2 = (INT32) wsptr[2]; 00893 00894 tmp10 = (tmp0 + tmp2) << CONST_BITS; 00895 tmp12 = (tmp0 - tmp2) << CONST_BITS; 00896 00897 /* Odd part */ 00898 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 00899 00900 z2 = (INT32) wsptr[1]; 00901 z3 = (INT32) wsptr[3]; 00902 00903 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 00904 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 00905 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 00906 00907 /* Final output stage */ 00908 00909 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 00910 CONST_BITS+PASS1_BITS+3) 00911 & RANGE_MASK]; 00912 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 00913 CONST_BITS+PASS1_BITS+3) 00914 & RANGE_MASK]; 00915 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 00916 CONST_BITS+PASS1_BITS+3) 00917 & RANGE_MASK]; 00918 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 00919 CONST_BITS+PASS1_BITS+3) 00920 & RANGE_MASK]; 00921 00922 wsptr += 4; /* advance pointer to next row */ 00923 } 00924 } 00925 00926 00927 /* 00928 * Perform dequantization and inverse DCT on one block of coefficients, 00929 * producing a reduced-size 3x3 output block. 00930 * 00931 * Optimized algorithm with 2 multiplications in the 1-D kernel. 00932 * cK represents sqrt(2) * cos(K*pi/6). 00933 */ 00934 00935 GLOBAL(void) 00936 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 00937 JCOEFPTR coef_block, 00938 JSAMPARRAY output_buf, JDIMENSION output_col) 00939 { 00940 INT32 tmp0, tmp2, tmp10, tmp12; 00941 JCOEFPTR inptr; 00942 ISLOW_MULT_TYPE * quantptr; 00943 int * wsptr; 00944 JSAMPROW outptr; 00945 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 00946 int ctr; 00947 int workspace[3*3]; /* buffers data between passes */ 00948 SHIFT_TEMPS 00949 00950 /* Pass 1: process columns from input, store into work array. */ 00951 00952 inptr = coef_block; 00953 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 00954 wsptr = workspace; 00955 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { 00956 /* Even part */ 00957 00958 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 00959 tmp0 <<= CONST_BITS; 00960 /* Add fudge factor here for final descale. */ 00961 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 00962 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 00963 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 00964 tmp10 = tmp0 + tmp12; 00965 tmp2 = tmp0 - tmp12 - tmp12; 00966 00967 /* Odd part */ 00968 00969 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 00970 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 00971 00972 /* Final output stage */ 00973 00974 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 00975 wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 00976 wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); 00977 } 00978 00979 /* Pass 2: process 3 rows from work array, store into output array. */ 00980 00981 wsptr = workspace; 00982 for (ctr = 0; ctr < 3; ctr++) { 00983 outptr = output_buf[ctr] + output_col; 00984 00985 /* Even part */ 00986 00987 /* Add range center and fudge factor for final descale and range-limit. */ 00988 tmp0 = (INT32) wsptr[0] + 00989 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 00990 (ONE << (PASS1_BITS+2))); 00991 tmp0 <<= CONST_BITS; 00992 tmp2 = (INT32) wsptr[2]; 00993 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 00994 tmp10 = tmp0 + tmp12; 00995 tmp2 = tmp0 - tmp12 - tmp12; 00996 00997 /* Odd part */ 00998 00999 tmp12 = (INT32) wsptr[1]; 01000 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 01001 01002 /* Final output stage */ 01003 01004 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 01005 CONST_BITS+PASS1_BITS+3) 01006 & RANGE_MASK]; 01007 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 01008 CONST_BITS+PASS1_BITS+3) 01009 & RANGE_MASK]; 01010 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, 01011 CONST_BITS+PASS1_BITS+3) 01012 & RANGE_MASK]; 01013 01014 wsptr += 3; /* advance pointer to next row */ 01015 } 01016 } 01017 01018 01019 /* 01020 * Perform dequantization and inverse DCT on one block of coefficients, 01021 * producing a reduced-size 2x2 output block. 01022 * 01023 * Multiplication-less algorithm. 01024 */ 01025 01026 GLOBAL(void) 01027 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 01028 JCOEFPTR coef_block, 01029 JSAMPARRAY output_buf, JDIMENSION output_col) 01030 { 01031 DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; 01032 ISLOW_MULT_TYPE * quantptr; 01033 JSAMPROW outptr; 01034 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 01035 ISHIFT_TEMPS 01036 01037 /* Pass 1: process columns from input. */ 01038 01039 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 01040 01041 /* Column 0 */ 01042 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); 01043 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); 01044 /* Add range center and fudge factor for final descale and range-limit. */ 01045 tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 01046 01047 tmp0 = tmp4 + tmp5; 01048 tmp2 = tmp4 - tmp5; 01049 01050 /* Column 1 */ 01051 tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]); 01052 tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]); 01053 01054 tmp1 = tmp4 + tmp5; 01055 tmp3 = tmp4 - tmp5; 01056 01057 /* Pass 2: process 2 rows, store into output array. */ 01058 01059 /* Row 0 */ 01060 outptr = output_buf[0] + output_col; 01061 01062 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; 01063 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; 01064 01065 /* Row 1 */ 01066 outptr = output_buf[1] + output_col; 01067 01068 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK]; 01069 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK]; 01070 } 01071 01072 01073 /* 01074 * Perform dequantization and inverse DCT on one block of coefficients, 01075 * producing a reduced-size 1x1 output block. 01076 * 01077 * We hardly need an inverse DCT routine for this: just take the 01078 * average pixel value, which is one-eighth of the DC coefficient. 01079 */ 01080 01081 GLOBAL(void) 01082 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 01083 JCOEFPTR coef_block, 01084 JSAMPARRAY output_buf, JDIMENSION output_col) 01085 { 01086 DCTELEM dcval; 01087 ISLOW_MULT_TYPE * quantptr; 01088 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 01089 ISHIFT_TEMPS 01090 01091 /* 1x1 is trivial: just take the DC coefficient divided by 8. */ 01092 01093 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 01094 01095 dcval = DEQUANTIZE(coef_block[0], quantptr[0]); 01096 /* Add range center and fudge factor for descale and range-limit. */ 01097 dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 01098 01099 output_buf[0][output_col] = 01100 range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK]; 01101 } 01102 01103 01104 /* 01105 * Perform dequantization and inverse DCT on one block of coefficients, 01106 * producing a 9x9 output block. 01107 * 01108 * Optimized algorithm with 10 multiplications in the 1-D kernel. 01109 * cK represents sqrt(2) * cos(K*pi/18). 01110 */ 01111 01112 GLOBAL(void) 01113 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 01114 JCOEFPTR coef_block, 01115 JSAMPARRAY output_buf, JDIMENSION output_col) 01116 { 01117 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14; 01118 INT32 z1, z2, z3, z4; 01119 JCOEFPTR inptr; 01120 ISLOW_MULT_TYPE * quantptr; 01121 int * wsptr; 01122 JSAMPROW outptr; 01123 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 01124 int ctr; 01125 int workspace[8*9]; /* buffers data between passes */ 01126 SHIFT_TEMPS 01127 01128 /* Pass 1: process columns from input, store into work array. */ 01129 01130 inptr = coef_block; 01131 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 01132 wsptr = workspace; 01133 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 01134 /* Even part */ 01135 01136 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 01137 tmp0 <<= CONST_BITS; 01138 /* Add fudge factor here for final descale. */ 01139 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 01140 01141 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 01142 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 01143 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 01144 01145 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 01146 tmp1 = tmp0 + tmp3; 01147 tmp2 = tmp0 - tmp3 - tmp3; 01148 01149 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 01150 tmp11 = tmp2 + tmp0; 01151 tmp14 = tmp2 - tmp0 - tmp0; 01152 01153 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 01154 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 01155 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 01156 01157 tmp10 = tmp1 + tmp0 - tmp3; 01158 tmp12 = tmp1 - tmp0 + tmp2; 01159 tmp13 = tmp1 - tmp2 + tmp3; 01160 01161 /* Odd part */ 01162 01163 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 01164 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 01165 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 01166 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 01167 01168 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 01169 01170 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 01171 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 01172 tmp0 = tmp2 + tmp3 - z2; 01173 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 01174 tmp2 += z2 - tmp1; 01175 tmp3 += z2 + tmp1; 01176 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 01177 01178 /* Final output stage */ 01179 01180 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 01181 wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 01182 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS); 01183 wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS); 01184 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 01185 wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 01186 wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS); 01187 wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS); 01188 wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS); 01189 } 01190 01191 /* Pass 2: process 9 rows from work array, store into output array. */ 01192 01193 wsptr = workspace; 01194 for (ctr = 0; ctr < 9; ctr++) { 01195 outptr = output_buf[ctr] + output_col; 01196 01197 /* Even part */ 01198 01199 /* Add range center and fudge factor for final descale and range-limit. */ 01200 tmp0 = (INT32) wsptr[0] + 01201 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 01202 (ONE << (PASS1_BITS+2))); 01203 tmp0 <<= CONST_BITS; 01204 01205 z1 = (INT32) wsptr[2]; 01206 z2 = (INT32) wsptr[4]; 01207 z3 = (INT32) wsptr[6]; 01208 01209 tmp3 = MULTIPLY(z3, FIX(0.707106781)); /* c6 */ 01210 tmp1 = tmp0 + tmp3; 01211 tmp2 = tmp0 - tmp3 - tmp3; 01212 01213 tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */ 01214 tmp11 = tmp2 + tmp0; 01215 tmp14 = tmp2 - tmp0 - tmp0; 01216 01217 tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */ 01218 tmp2 = MULTIPLY(z1, FIX(1.083350441)); /* c4 */ 01219 tmp3 = MULTIPLY(z2, FIX(0.245575608)); /* c8 */ 01220 01221 tmp10 = tmp1 + tmp0 - tmp3; 01222 tmp12 = tmp1 - tmp0 + tmp2; 01223 tmp13 = tmp1 - tmp2 + tmp3; 01224 01225 /* Odd part */ 01226 01227 z1 = (INT32) wsptr[1]; 01228 z2 = (INT32) wsptr[3]; 01229 z3 = (INT32) wsptr[5]; 01230 z4 = (INT32) wsptr[7]; 01231 01232 z2 = MULTIPLY(z2, - FIX(1.224744871)); /* -c3 */ 01233 01234 tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955)); /* c5 */ 01235 tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525)); /* c7 */ 01236 tmp0 = tmp2 + tmp3 - z2; 01237 tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481)); /* c1 */ 01238 tmp2 += z2 - tmp1; 01239 tmp3 += z2 + tmp1; 01240 tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */ 01241 01242 /* Final output stage */ 01243 01244 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 01245 CONST_BITS+PASS1_BITS+3) 01246 & RANGE_MASK]; 01247 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 01248 CONST_BITS+PASS1_BITS+3) 01249 & RANGE_MASK]; 01250 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 01251 CONST_BITS+PASS1_BITS+3) 01252 & RANGE_MASK]; 01253 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 01254 CONST_BITS+PASS1_BITS+3) 01255 & RANGE_MASK]; 01256 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 01257 CONST_BITS+PASS1_BITS+3) 01258 & RANGE_MASK]; 01259 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 01260 CONST_BITS+PASS1_BITS+3) 01261 & RANGE_MASK]; 01262 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3, 01263 CONST_BITS+PASS1_BITS+3) 01264 & RANGE_MASK]; 01265 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3, 01266 CONST_BITS+PASS1_BITS+3) 01267 & RANGE_MASK]; 01268 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14, 01269 CONST_BITS+PASS1_BITS+3) 01270 & RANGE_MASK]; 01271 01272 wsptr += 8; /* advance pointer to next row */ 01273 } 01274 } 01275 01276 01277 /* 01278 * Perform dequantization and inverse DCT on one block of coefficients, 01279 * producing a 10x10 output block. 01280 * 01281 * Optimized algorithm with 12 multiplications in the 1-D kernel. 01282 * cK represents sqrt(2) * cos(K*pi/20). 01283 */ 01284 01285 GLOBAL(void) 01286 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 01287 JCOEFPTR coef_block, 01288 JSAMPARRAY output_buf, JDIMENSION output_col) 01289 { 01290 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 01291 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 01292 INT32 z1, z2, z3, z4, z5; 01293 JCOEFPTR inptr; 01294 ISLOW_MULT_TYPE * quantptr; 01295 int * wsptr; 01296 JSAMPROW outptr; 01297 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 01298 int ctr; 01299 int workspace[8*10]; /* buffers data between passes */ 01300 SHIFT_TEMPS 01301 01302 /* Pass 1: process columns from input, store into work array. */ 01303 01304 inptr = coef_block; 01305 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 01306 wsptr = workspace; 01307 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 01308 /* Even part */ 01309 01310 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 01311 z3 <<= CONST_BITS; 01312 /* Add fudge factor here for final descale. */ 01313 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 01314 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 01315 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 01316 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 01317 tmp10 = z3 + z1; 01318 tmp11 = z3 - z2; 01319 01320 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ 01321 CONST_BITS-PASS1_BITS); 01322 01323 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 01324 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 01325 01326 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 01327 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 01328 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 01329 01330 tmp20 = tmp10 + tmp12; 01331 tmp24 = tmp10 - tmp12; 01332 tmp21 = tmp11 + tmp13; 01333 tmp23 = tmp11 - tmp13; 01334 01335 /* Odd part */ 01336 01337 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 01338 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 01339 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 01340 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 01341 01342 tmp11 = z2 + z4; 01343 tmp13 = z2 - z4; 01344 01345 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 01346 z5 = z3 << CONST_BITS; 01347 01348 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 01349 z4 = z5 + tmp12; 01350 01351 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 01352 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 01353 01354 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 01355 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); 01356 01357 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; 01358 01359 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 01360 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 01361 01362 /* Final output stage */ 01363 01364 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 01365 wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 01366 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 01367 wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 01368 wsptr[8*2] = (int) (tmp22 + tmp12); 01369 wsptr[8*7] = (int) (tmp22 - tmp12); 01370 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 01371 wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 01372 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 01373 wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 01374 } 01375 01376 /* Pass 2: process 10 rows from work array, store into output array. */ 01377 01378 wsptr = workspace; 01379 for (ctr = 0; ctr < 10; ctr++) { 01380 outptr = output_buf[ctr] + output_col; 01381 01382 /* Even part */ 01383 01384 /* Add range center and fudge factor for final descale and range-limit. */ 01385 z3 = (INT32) wsptr[0] + 01386 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 01387 (ONE << (PASS1_BITS+2))); 01388 z3 <<= CONST_BITS; 01389 z4 = (INT32) wsptr[4]; 01390 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 01391 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 01392 tmp10 = z3 + z1; 01393 tmp11 = z3 - z2; 01394 01395 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ 01396 01397 z2 = (INT32) wsptr[2]; 01398 z3 = (INT32) wsptr[6]; 01399 01400 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 01401 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 01402 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 01403 01404 tmp20 = tmp10 + tmp12; 01405 tmp24 = tmp10 - tmp12; 01406 tmp21 = tmp11 + tmp13; 01407 tmp23 = tmp11 - tmp13; 01408 01409 /* Odd part */ 01410 01411 z1 = (INT32) wsptr[1]; 01412 z2 = (INT32) wsptr[3]; 01413 z3 = (INT32) wsptr[5]; 01414 z3 <<= CONST_BITS; 01415 z4 = (INT32) wsptr[7]; 01416 01417 tmp11 = z2 + z4; 01418 tmp13 = z2 - z4; 01419 01420 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 01421 01422 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 01423 z4 = z3 + tmp12; 01424 01425 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 01426 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 01427 01428 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 01429 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); 01430 01431 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; 01432 01433 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 01434 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 01435 01436 /* Final output stage */ 01437 01438 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 01439 CONST_BITS+PASS1_BITS+3) 01440 & RANGE_MASK]; 01441 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 01442 CONST_BITS+PASS1_BITS+3) 01443 & RANGE_MASK]; 01444 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 01445 CONST_BITS+PASS1_BITS+3) 01446 & RANGE_MASK]; 01447 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 01448 CONST_BITS+PASS1_BITS+3) 01449 & RANGE_MASK]; 01450 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 01451 CONST_BITS+PASS1_BITS+3) 01452 & RANGE_MASK]; 01453 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 01454 CONST_BITS+PASS1_BITS+3) 01455 & RANGE_MASK]; 01456 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 01457 CONST_BITS+PASS1_BITS+3) 01458 & RANGE_MASK]; 01459 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 01460 CONST_BITS+PASS1_BITS+3) 01461 & RANGE_MASK]; 01462 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 01463 CONST_BITS+PASS1_BITS+3) 01464 & RANGE_MASK]; 01465 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 01466 CONST_BITS+PASS1_BITS+3) 01467 & RANGE_MASK]; 01468 01469 wsptr += 8; /* advance pointer to next row */ 01470 } 01471 } 01472 01473 01474 /* 01475 * Perform dequantization and inverse DCT on one block of coefficients, 01476 * producing a 11x11 output block. 01477 * 01478 * Optimized algorithm with 24 multiplications in the 1-D kernel. 01479 * cK represents sqrt(2) * cos(K*pi/22). 01480 */ 01481 01482 GLOBAL(void) 01483 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 01484 JCOEFPTR coef_block, 01485 JSAMPARRAY output_buf, JDIMENSION output_col) 01486 { 01487 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 01488 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 01489 INT32 z1, z2, z3, z4; 01490 JCOEFPTR inptr; 01491 ISLOW_MULT_TYPE * quantptr; 01492 int * wsptr; 01493 JSAMPROW outptr; 01494 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 01495 int ctr; 01496 int workspace[8*11]; /* buffers data between passes */ 01497 SHIFT_TEMPS 01498 01499 /* Pass 1: process columns from input, store into work array. */ 01500 01501 inptr = coef_block; 01502 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 01503 wsptr = workspace; 01504 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 01505 /* Even part */ 01506 01507 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 01508 tmp10 <<= CONST_BITS; 01509 /* Add fudge factor here for final descale. */ 01510 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); 01511 01512 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 01513 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 01514 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 01515 01516 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 01517 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 01518 z4 = z1 + z3; 01519 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 01520 z4 -= z2; 01521 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 01522 tmp21 = tmp20 + tmp23 + tmp25 - 01523 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 01524 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 01525 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 01526 tmp24 += tmp25; 01527 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 01528 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 01529 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 01530 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 01531 01532 /* Odd part */ 01533 01534 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 01535 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 01536 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 01537 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 01538 01539 tmp11 = z1 + z2; 01540 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 01541 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 01542 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 01543 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 01544 tmp10 = tmp11 + tmp12 + tmp13 - 01545 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 01546 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 01547 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 01548 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 01549 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 01550 tmp11 += z1; 01551 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 01552 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 01553 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 01554 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 01555 01556 /* Final output stage */ 01557 01558 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 01559 wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 01560 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 01561 wsptr[8*9] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 01562 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 01563 wsptr[8*8] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 01564 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 01565 wsptr[8*7] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 01566 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 01567 wsptr[8*6] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 01568 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS); 01569 } 01570 01571 /* Pass 2: process 11 rows from work array, store into output array. */ 01572 01573 wsptr = workspace; 01574 for (ctr = 0; ctr < 11; ctr++) { 01575 outptr = output_buf[ctr] + output_col; 01576 01577 /* Even part */ 01578 01579 /* Add range center and fudge factor for final descale and range-limit. */ 01580 tmp10 = (INT32) wsptr[0] + 01581 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 01582 (ONE << (PASS1_BITS+2))); 01583 tmp10 <<= CONST_BITS; 01584 01585 z1 = (INT32) wsptr[2]; 01586 z2 = (INT32) wsptr[4]; 01587 z3 = (INT32) wsptr[6]; 01588 01589 tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132)); /* c2+c4 */ 01590 tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045)); /* c2-c6 */ 01591 z4 = z1 + z3; 01592 tmp24 = MULTIPLY(z4, - FIX(1.155664402)); /* -(c2-c10) */ 01593 z4 -= z2; 01594 tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976)); /* c2 */ 01595 tmp21 = tmp20 + tmp23 + tmp25 - 01596 MULTIPLY(z2, FIX(1.821790775)); /* c2+c4+c10-c6 */ 01597 tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */ 01598 tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */ 01599 tmp24 += tmp25; 01600 tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120)); /* c8+c10 */ 01601 tmp24 += MULTIPLY(z2, FIX(1.944413522)) - /* c2+c8 */ 01602 MULTIPLY(z1, FIX(1.390975730)); /* c4+c10 */ 01603 tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562)); /* c0 */ 01604 01605 /* Odd part */ 01606 01607 z1 = (INT32) wsptr[1]; 01608 z2 = (INT32) wsptr[3]; 01609 z3 = (INT32) wsptr[5]; 01610 z4 = (INT32) wsptr[7]; 01611 01612 tmp11 = z1 + z2; 01613 tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */ 01614 tmp11 = MULTIPLY(tmp11, FIX(0.887983902)); /* c3-c9 */ 01615 tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295)); /* c5-c9 */ 01616 tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */ 01617 tmp10 = tmp11 + tmp12 + tmp13 - 01618 MULTIPLY(z1, FIX(0.923107866)); /* c7+c5+c3-c1-2*c9 */ 01619 z1 = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */ 01620 tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588)); /* c1+c7+3*c9-c3 */ 01621 tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623)); /* c3+c5-c7-c9 */ 01622 z1 = MULTIPLY(z2 + z4, - FIX(1.798248910)); /* -(c1+c9) */ 01623 tmp11 += z1; 01624 tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632)); /* c1+c5+c9-c7 */ 01625 tmp14 += MULTIPLY(z2, - FIX(1.467221301)) + /* -(c5+c9) */ 01626 MULTIPLY(z3, FIX(1.001388905)) - /* c1-c9 */ 01627 MULTIPLY(z4, FIX(1.684843907)); /* c3+c9 */ 01628 01629 /* Final output stage */ 01630 01631 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 01632 CONST_BITS+PASS1_BITS+3) 01633 & RANGE_MASK]; 01634 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 01635 CONST_BITS+PASS1_BITS+3) 01636 & RANGE_MASK]; 01637 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 01638 CONST_BITS+PASS1_BITS+3) 01639 & RANGE_MASK]; 01640 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 01641 CONST_BITS+PASS1_BITS+3) 01642 & RANGE_MASK]; 01643 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 01644 CONST_BITS+PASS1_BITS+3) 01645 & RANGE_MASK]; 01646 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 01647 CONST_BITS+PASS1_BITS+3) 01648 & RANGE_MASK]; 01649 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 01650 CONST_BITS+PASS1_BITS+3) 01651 & RANGE_MASK]; 01652 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 01653 CONST_BITS+PASS1_BITS+3) 01654 & RANGE_MASK]; 01655 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 01656 CONST_BITS+PASS1_BITS+3) 01657 & RANGE_MASK]; 01658 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 01659 CONST_BITS+PASS1_BITS+3) 01660 & RANGE_MASK]; 01661 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25, 01662 CONST_BITS+PASS1_BITS+3) 01663 & RANGE_MASK]; 01664 01665 wsptr += 8; /* advance pointer to next row */ 01666 } 01667 } 01668 01669 01670 /* 01671 * Perform dequantization and inverse DCT on one block of coefficients, 01672 * producing a 12x12 output block. 01673 * 01674 * Optimized algorithm with 15 multiplications in the 1-D kernel. 01675 * cK represents sqrt(2) * cos(K*pi/24). 01676 */ 01677 01678 GLOBAL(void) 01679 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 01680 JCOEFPTR coef_block, 01681 JSAMPARRAY output_buf, JDIMENSION output_col) 01682 { 01683 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 01684 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 01685 INT32 z1, z2, z3, z4; 01686 JCOEFPTR inptr; 01687 ISLOW_MULT_TYPE * quantptr; 01688 int * wsptr; 01689 JSAMPROW outptr; 01690 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 01691 int ctr; 01692 int workspace[8*12]; /* buffers data between passes */ 01693 SHIFT_TEMPS 01694 01695 /* Pass 1: process columns from input, store into work array. */ 01696 01697 inptr = coef_block; 01698 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 01699 wsptr = workspace; 01700 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 01701 /* Even part */ 01702 01703 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 01704 z3 <<= CONST_BITS; 01705 /* Add fudge factor here for final descale. */ 01706 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 01707 01708 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 01709 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 01710 01711 tmp10 = z3 + z4; 01712 tmp11 = z3 - z4; 01713 01714 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 01715 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 01716 z1 <<= CONST_BITS; 01717 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 01718 z2 <<= CONST_BITS; 01719 01720 tmp12 = z1 - z2; 01721 01722 tmp21 = z3 + tmp12; 01723 tmp24 = z3 - tmp12; 01724 01725 tmp12 = z4 + z2; 01726 01727 tmp20 = tmp10 + tmp12; 01728 tmp25 = tmp10 - tmp12; 01729 01730 tmp12 = z4 - z1 - z2; 01731 01732 tmp22 = tmp11 + tmp12; 01733 tmp23 = tmp11 - tmp12; 01734 01735 /* Odd part */ 01736 01737 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 01738 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 01739 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 01740 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 01741 01742 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 01743 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 01744 01745 tmp10 = z1 + z3; 01746 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 01747 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 01748 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 01749 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 01750 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 01751 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 01752 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 01753 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 01754 01755 z1 -= z4; 01756 z2 -= z3; 01757 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 01758 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 01759 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 01760 01761 /* Final output stage */ 01762 01763 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 01764 wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 01765 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 01766 wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 01767 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 01768 wsptr[8*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 01769 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 01770 wsptr[8*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 01771 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 01772 wsptr[8*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 01773 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 01774 wsptr[8*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 01775 } 01776 01777 /* Pass 2: process 12 rows from work array, store into output array. */ 01778 01779 wsptr = workspace; 01780 for (ctr = 0; ctr < 12; ctr++) { 01781 outptr = output_buf[ctr] + output_col; 01782 01783 /* Even part */ 01784 01785 /* Add range center and fudge factor for final descale and range-limit. */ 01786 z3 = (INT32) wsptr[0] + 01787 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 01788 (ONE << (PASS1_BITS+2))); 01789 z3 <<= CONST_BITS; 01790 01791 z4 = (INT32) wsptr[4]; 01792 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 01793 01794 tmp10 = z3 + z4; 01795 tmp11 = z3 - z4; 01796 01797 z1 = (INT32) wsptr[2]; 01798 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 01799 z1 <<= CONST_BITS; 01800 z2 = (INT32) wsptr[6]; 01801 z2 <<= CONST_BITS; 01802 01803 tmp12 = z1 - z2; 01804 01805 tmp21 = z3 + tmp12; 01806 tmp24 = z3 - tmp12; 01807 01808 tmp12 = z4 + z2; 01809 01810 tmp20 = tmp10 + tmp12; 01811 tmp25 = tmp10 - tmp12; 01812 01813 tmp12 = z4 - z1 - z2; 01814 01815 tmp22 = tmp11 + tmp12; 01816 tmp23 = tmp11 - tmp12; 01817 01818 /* Odd part */ 01819 01820 z1 = (INT32) wsptr[1]; 01821 z2 = (INT32) wsptr[3]; 01822 z3 = (INT32) wsptr[5]; 01823 z4 = (INT32) wsptr[7]; 01824 01825 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 01826 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 01827 01828 tmp10 = z1 + z3; 01829 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 01830 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 01831 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 01832 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 01833 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 01834 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 01835 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 01836 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 01837 01838 z1 -= z4; 01839 z2 -= z3; 01840 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 01841 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 01842 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 01843 01844 /* Final output stage */ 01845 01846 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 01847 CONST_BITS+PASS1_BITS+3) 01848 & RANGE_MASK]; 01849 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 01850 CONST_BITS+PASS1_BITS+3) 01851 & RANGE_MASK]; 01852 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 01853 CONST_BITS+PASS1_BITS+3) 01854 & RANGE_MASK]; 01855 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 01856 CONST_BITS+PASS1_BITS+3) 01857 & RANGE_MASK]; 01858 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 01859 CONST_BITS+PASS1_BITS+3) 01860 & RANGE_MASK]; 01861 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 01862 CONST_BITS+PASS1_BITS+3) 01863 & RANGE_MASK]; 01864 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 01865 CONST_BITS+PASS1_BITS+3) 01866 & RANGE_MASK]; 01867 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 01868 CONST_BITS+PASS1_BITS+3) 01869 & RANGE_MASK]; 01870 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 01871 CONST_BITS+PASS1_BITS+3) 01872 & RANGE_MASK]; 01873 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 01874 CONST_BITS+PASS1_BITS+3) 01875 & RANGE_MASK]; 01876 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 01877 CONST_BITS+PASS1_BITS+3) 01878 & RANGE_MASK]; 01879 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 01880 CONST_BITS+PASS1_BITS+3) 01881 & RANGE_MASK]; 01882 01883 wsptr += 8; /* advance pointer to next row */ 01884 } 01885 } 01886 01887 01888 /* 01889 * Perform dequantization and inverse DCT on one block of coefficients, 01890 * producing a 13x13 output block. 01891 * 01892 * Optimized algorithm with 29 multiplications in the 1-D kernel. 01893 * cK represents sqrt(2) * cos(K*pi/26). 01894 */ 01895 01896 GLOBAL(void) 01897 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 01898 JCOEFPTR coef_block, 01899 JSAMPARRAY output_buf, JDIMENSION output_col) 01900 { 01901 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 01902 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 01903 INT32 z1, z2, z3, z4; 01904 JCOEFPTR inptr; 01905 ISLOW_MULT_TYPE * quantptr; 01906 int * wsptr; 01907 JSAMPROW outptr; 01908 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 01909 int ctr; 01910 int workspace[8*13]; /* buffers data between passes */ 01911 SHIFT_TEMPS 01912 01913 /* Pass 1: process columns from input, store into work array. */ 01914 01915 inptr = coef_block; 01916 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 01917 wsptr = workspace; 01918 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 01919 /* Even part */ 01920 01921 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 01922 z1 <<= CONST_BITS; 01923 /* Add fudge factor here for final descale. */ 01924 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 01925 01926 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 01927 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 01928 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 01929 01930 tmp10 = z3 + z4; 01931 tmp11 = z3 - z4; 01932 01933 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 01934 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 01935 01936 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 01937 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 01938 01939 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 01940 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 01941 01942 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 01943 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 01944 01945 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 01946 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 01947 01948 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 01949 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 01950 01951 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 01952 01953 /* Odd part */ 01954 01955 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 01956 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 01957 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 01958 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 01959 01960 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 01961 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 01962 tmp15 = z1 + z4; 01963 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 01964 tmp10 = tmp11 + tmp12 + tmp13 - 01965 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 01966 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 01967 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 01968 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 01969 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 01970 tmp11 += tmp14; 01971 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 01972 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 01973 tmp12 += tmp14; 01974 tmp13 += tmp14; 01975 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 01976 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 01977 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 01978 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 01979 tmp14 += z1; 01980 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 01981 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 01982 01983 /* Final output stage */ 01984 01985 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 01986 wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 01987 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 01988 wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 01989 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 01990 wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 01991 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 01992 wsptr[8*9] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 01993 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 01994 wsptr[8*8] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 01995 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 01996 wsptr[8*7] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 01997 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS); 01998 } 01999 02000 /* Pass 2: process 13 rows from work array, store into output array. */ 02001 02002 wsptr = workspace; 02003 for (ctr = 0; ctr < 13; ctr++) { 02004 outptr = output_buf[ctr] + output_col; 02005 02006 /* Even part */ 02007 02008 /* Add range center and fudge factor for final descale and range-limit. */ 02009 z1 = (INT32) wsptr[0] + 02010 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 02011 (ONE << (PASS1_BITS+2))); 02012 z1 <<= CONST_BITS; 02013 02014 z2 = (INT32) wsptr[2]; 02015 z3 = (INT32) wsptr[4]; 02016 z4 = (INT32) wsptr[6]; 02017 02018 tmp10 = z3 + z4; 02019 tmp11 = z3 - z4; 02020 02021 tmp12 = MULTIPLY(tmp10, FIX(1.155388986)); /* (c4+c6)/2 */ 02022 tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1; /* (c4-c6)/2 */ 02023 02024 tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13; /* c2 */ 02025 tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13; /* c10 */ 02026 02027 tmp12 = MULTIPLY(tmp10, FIX(0.316450131)); /* (c8-c12)/2 */ 02028 tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1; /* (c8+c12)/2 */ 02029 02030 tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13; /* c6 */ 02031 tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */ 02032 02033 tmp12 = MULTIPLY(tmp10, FIX(0.435816023)); /* (c2-c10)/2 */ 02034 tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1; /* (c2+c10)/2 */ 02035 02036 tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */ 02037 tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */ 02038 02039 tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1; /* c0 */ 02040 02041 /* Odd part */ 02042 02043 z1 = (INT32) wsptr[1]; 02044 z2 = (INT32) wsptr[3]; 02045 z3 = (INT32) wsptr[5]; 02046 z4 = (INT32) wsptr[7]; 02047 02048 tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651)); /* c3 */ 02049 tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945)); /* c5 */ 02050 tmp15 = z1 + z4; 02051 tmp13 = MULTIPLY(tmp15, FIX(0.937797057)); /* c7 */ 02052 tmp10 = tmp11 + tmp12 + tmp13 - 02053 MULTIPLY(z1, FIX(2.020082300)); /* c7+c5+c3-c1 */ 02054 tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458)); /* -c11 */ 02055 tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */ 02056 tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */ 02057 tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945)); /* -c5 */ 02058 tmp11 += tmp14; 02059 tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */ 02060 tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813)); /* -c9 */ 02061 tmp12 += tmp14; 02062 tmp13 += tmp14; 02063 tmp15 = MULTIPLY(tmp15, FIX(0.338443458)); /* c11 */ 02064 tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */ 02065 MULTIPLY(z2, FIX(0.466105296)); /* c1-c7 */ 02066 z1 = MULTIPLY(z3 - z2, FIX(0.937797057)); /* c7 */ 02067 tmp14 += z1; 02068 tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) - /* c3-c7 */ 02069 MULTIPLY(z4, FIX(1.742345811)); /* c1+c11 */ 02070 02071 /* Final output stage */ 02072 02073 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 02074 CONST_BITS+PASS1_BITS+3) 02075 & RANGE_MASK]; 02076 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 02077 CONST_BITS+PASS1_BITS+3) 02078 & RANGE_MASK]; 02079 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 02080 CONST_BITS+PASS1_BITS+3) 02081 & RANGE_MASK]; 02082 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 02083 CONST_BITS+PASS1_BITS+3) 02084 & RANGE_MASK]; 02085 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 02086 CONST_BITS+PASS1_BITS+3) 02087 & RANGE_MASK]; 02088 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 02089 CONST_BITS+PASS1_BITS+3) 02090 & RANGE_MASK]; 02091 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 02092 CONST_BITS+PASS1_BITS+3) 02093 & RANGE_MASK]; 02094 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 02095 CONST_BITS+PASS1_BITS+3) 02096 & RANGE_MASK]; 02097 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 02098 CONST_BITS+PASS1_BITS+3) 02099 & RANGE_MASK]; 02100 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 02101 CONST_BITS+PASS1_BITS+3) 02102 & RANGE_MASK]; 02103 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 02104 CONST_BITS+PASS1_BITS+3) 02105 & RANGE_MASK]; 02106 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 02107 CONST_BITS+PASS1_BITS+3) 02108 & RANGE_MASK]; 02109 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26, 02110 CONST_BITS+PASS1_BITS+3) 02111 & RANGE_MASK]; 02112 02113 wsptr += 8; /* advance pointer to next row */ 02114 } 02115 } 02116 02117 02118 /* 02119 * Perform dequantization and inverse DCT on one block of coefficients, 02120 * producing a 14x14 output block. 02121 * 02122 * Optimized algorithm with 20 multiplications in the 1-D kernel. 02123 * cK represents sqrt(2) * cos(K*pi/28). 02124 */ 02125 02126 GLOBAL(void) 02127 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 02128 JCOEFPTR coef_block, 02129 JSAMPARRAY output_buf, JDIMENSION output_col) 02130 { 02131 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 02132 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 02133 INT32 z1, z2, z3, z4; 02134 JCOEFPTR inptr; 02135 ISLOW_MULT_TYPE * quantptr; 02136 int * wsptr; 02137 JSAMPROW outptr; 02138 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 02139 int ctr; 02140 int workspace[8*14]; /* buffers data between passes */ 02141 SHIFT_TEMPS 02142 02143 /* Pass 1: process columns from input, store into work array. */ 02144 02145 inptr = coef_block; 02146 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 02147 wsptr = workspace; 02148 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 02149 /* Even part */ 02150 02151 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 02152 z1 <<= CONST_BITS; 02153 /* Add fudge factor here for final descale. */ 02154 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 02155 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 02156 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 02157 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 02158 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 02159 02160 tmp10 = z1 + z2; 02161 tmp11 = z1 + z3; 02162 tmp12 = z1 - z4; 02163 02164 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ 02165 CONST_BITS-PASS1_BITS); 02166 02167 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 02168 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 02169 02170 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 02171 02172 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 02173 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 02174 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 02175 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 02176 02177 tmp20 = tmp10 + tmp13; 02178 tmp26 = tmp10 - tmp13; 02179 tmp21 = tmp11 + tmp14; 02180 tmp25 = tmp11 - tmp14; 02181 tmp22 = tmp12 + tmp15; 02182 tmp24 = tmp12 - tmp15; 02183 02184 /* Odd part */ 02185 02186 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 02187 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 02188 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 02189 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 02190 tmp13 = z4 << CONST_BITS; 02191 02192 tmp14 = z1 + z3; 02193 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 02194 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 02195 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 02196 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 02197 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 02198 z1 -= z2; 02199 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ 02200 tmp16 += tmp15; 02201 z1 += z4; 02202 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ 02203 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 02204 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 02205 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 02206 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 02207 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 02208 02209 tmp13 = (z1 - z3) << PASS1_BITS; 02210 02211 /* Final output stage */ 02212 02213 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 02214 wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 02215 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 02216 wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 02217 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 02218 wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 02219 wsptr[8*3] = (int) (tmp23 + tmp13); 02220 wsptr[8*10] = (int) (tmp23 - tmp13); 02221 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 02222 wsptr[8*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 02223 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 02224 wsptr[8*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 02225 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 02226 wsptr[8*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 02227 } 02228 02229 /* Pass 2: process 14 rows from work array, store into output array. */ 02230 02231 wsptr = workspace; 02232 for (ctr = 0; ctr < 14; ctr++) { 02233 outptr = output_buf[ctr] + output_col; 02234 02235 /* Even part */ 02236 02237 /* Add range center and fudge factor for final descale and range-limit. */ 02238 z1 = (INT32) wsptr[0] + 02239 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 02240 (ONE << (PASS1_BITS+2))); 02241 z1 <<= CONST_BITS; 02242 z4 = (INT32) wsptr[4]; 02243 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 02244 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 02245 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 02246 02247 tmp10 = z1 + z2; 02248 tmp11 = z1 + z3; 02249 tmp12 = z1 - z4; 02250 02251 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ 02252 02253 z1 = (INT32) wsptr[2]; 02254 z2 = (INT32) wsptr[6]; 02255 02256 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 02257 02258 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 02259 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 02260 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 02261 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 02262 02263 tmp20 = tmp10 + tmp13; 02264 tmp26 = tmp10 - tmp13; 02265 tmp21 = tmp11 + tmp14; 02266 tmp25 = tmp11 - tmp14; 02267 tmp22 = tmp12 + tmp15; 02268 tmp24 = tmp12 - tmp15; 02269 02270 /* Odd part */ 02271 02272 z1 = (INT32) wsptr[1]; 02273 z2 = (INT32) wsptr[3]; 02274 z3 = (INT32) wsptr[5]; 02275 z4 = (INT32) wsptr[7]; 02276 z4 <<= CONST_BITS; 02277 02278 tmp14 = z1 + z3; 02279 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 02280 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 02281 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 02282 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 02283 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 02284 z1 -= z2; 02285 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ 02286 tmp16 += tmp15; 02287 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ 02288 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 02289 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 02290 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 02291 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 02292 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 02293 02294 tmp13 = ((z1 - z3) << CONST_BITS) + z4; 02295 02296 /* Final output stage */ 02297 02298 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 02299 CONST_BITS+PASS1_BITS+3) 02300 & RANGE_MASK]; 02301 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 02302 CONST_BITS+PASS1_BITS+3) 02303 & RANGE_MASK]; 02304 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 02305 CONST_BITS+PASS1_BITS+3) 02306 & RANGE_MASK]; 02307 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 02308 CONST_BITS+PASS1_BITS+3) 02309 & RANGE_MASK]; 02310 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 02311 CONST_BITS+PASS1_BITS+3) 02312 & RANGE_MASK]; 02313 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 02314 CONST_BITS+PASS1_BITS+3) 02315 & RANGE_MASK]; 02316 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 02317 CONST_BITS+PASS1_BITS+3) 02318 & RANGE_MASK]; 02319 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 02320 CONST_BITS+PASS1_BITS+3) 02321 & RANGE_MASK]; 02322 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 02323 CONST_BITS+PASS1_BITS+3) 02324 & RANGE_MASK]; 02325 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 02326 CONST_BITS+PASS1_BITS+3) 02327 & RANGE_MASK]; 02328 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 02329 CONST_BITS+PASS1_BITS+3) 02330 & RANGE_MASK]; 02331 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 02332 CONST_BITS+PASS1_BITS+3) 02333 & RANGE_MASK]; 02334 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 02335 CONST_BITS+PASS1_BITS+3) 02336 & RANGE_MASK]; 02337 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 02338 CONST_BITS+PASS1_BITS+3) 02339 & RANGE_MASK]; 02340 02341 wsptr += 8; /* advance pointer to next row */ 02342 } 02343 } 02344 02345 02346 /* 02347 * Perform dequantization and inverse DCT on one block of coefficients, 02348 * producing a 15x15 output block. 02349 * 02350 * Optimized algorithm with 22 multiplications in the 1-D kernel. 02351 * cK represents sqrt(2) * cos(K*pi/30). 02352 */ 02353 02354 GLOBAL(void) 02355 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 02356 JCOEFPTR coef_block, 02357 JSAMPARRAY output_buf, JDIMENSION output_col) 02358 { 02359 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 02360 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 02361 INT32 z1, z2, z3, z4; 02362 JCOEFPTR inptr; 02363 ISLOW_MULT_TYPE * quantptr; 02364 int * wsptr; 02365 JSAMPROW outptr; 02366 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 02367 int ctr; 02368 int workspace[8*15]; /* buffers data between passes */ 02369 SHIFT_TEMPS 02370 02371 /* Pass 1: process columns from input, store into work array. */ 02372 02373 inptr = coef_block; 02374 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 02375 wsptr = workspace; 02376 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 02377 /* Even part */ 02378 02379 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 02380 z1 <<= CONST_BITS; 02381 /* Add fudge factor here for final descale. */ 02382 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 02383 02384 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 02385 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 02386 z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 02387 02388 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 02389 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 02390 02391 tmp12 = z1 - tmp10; 02392 tmp13 = z1 + tmp11; 02393 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 02394 02395 z4 = z2 - z3; 02396 z3 += z2; 02397 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 02398 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 02399 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 02400 02401 tmp20 = tmp13 + tmp10 + tmp11; 02402 tmp23 = tmp12 - tmp10 + tmp11 + z2; 02403 02404 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 02405 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 02406 02407 tmp25 = tmp13 - tmp10 - tmp11; 02408 tmp26 = tmp12 + tmp10 - tmp11 - z2; 02409 02410 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 02411 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 02412 02413 tmp21 = tmp12 + tmp10 + tmp11; 02414 tmp24 = tmp13 - tmp10 + tmp11; 02415 tmp11 += tmp11; 02416 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 02417 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 02418 02419 /* Odd part */ 02420 02421 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 02422 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 02423 z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 02424 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 02425 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 02426 02427 tmp13 = z2 - z4; 02428 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 02429 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 02430 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 02431 02432 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 02433 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 02434 z2 = z1 - z4; 02435 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 02436 02437 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 02438 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 02439 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 02440 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 02441 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 02442 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 02443 02444 /* Final output stage */ 02445 02446 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 02447 wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 02448 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 02449 wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 02450 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 02451 wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 02452 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 02453 wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 02454 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 02455 wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 02456 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 02457 wsptr[8*9] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 02458 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 02459 wsptr[8*8] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 02460 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS); 02461 } 02462 02463 /* Pass 2: process 15 rows from work array, store into output array. */ 02464 02465 wsptr = workspace; 02466 for (ctr = 0; ctr < 15; ctr++) { 02467 outptr = output_buf[ctr] + output_col; 02468 02469 /* Even part */ 02470 02471 /* Add range center and fudge factor for final descale and range-limit. */ 02472 z1 = (INT32) wsptr[0] + 02473 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 02474 (ONE << (PASS1_BITS+2))); 02475 z1 <<= CONST_BITS; 02476 02477 z2 = (INT32) wsptr[2]; 02478 z3 = (INT32) wsptr[4]; 02479 z4 = (INT32) wsptr[6]; 02480 02481 tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */ 02482 tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */ 02483 02484 tmp12 = z1 - tmp10; 02485 tmp13 = z1 + tmp11; 02486 z1 -= (tmp11 - tmp10) << 1; /* c0 = (c6-c12)*2 */ 02487 02488 z4 = z2 - z3; 02489 z3 += z2; 02490 tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */ 02491 tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */ 02492 z2 = MULTIPLY(z2, FIX(1.439773946)); /* c4+c14 */ 02493 02494 tmp20 = tmp13 + tmp10 + tmp11; 02495 tmp23 = tmp12 - tmp10 + tmp11 + z2; 02496 02497 tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */ 02498 tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */ 02499 02500 tmp25 = tmp13 - tmp10 - tmp11; 02501 tmp26 = tmp12 + tmp10 - tmp11 - z2; 02502 02503 tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */ 02504 tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */ 02505 02506 tmp21 = tmp12 + tmp10 + tmp11; 02507 tmp24 = tmp13 - tmp10 + tmp11; 02508 tmp11 += tmp11; 02509 tmp22 = z1 + tmp11; /* c10 = c6-c12 */ 02510 tmp27 = z1 - tmp11 - tmp11; /* c0 = (c6-c12)*2 */ 02511 02512 /* Odd part */ 02513 02514 z1 = (INT32) wsptr[1]; 02515 z2 = (INT32) wsptr[3]; 02516 z4 = (INT32) wsptr[5]; 02517 z3 = MULTIPLY(z4, FIX(1.224744871)); /* c5 */ 02518 z4 = (INT32) wsptr[7]; 02519 02520 tmp13 = z2 - z4; 02521 tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876)); /* c9 */ 02522 tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148)); /* c3-c9 */ 02523 tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899)); /* c3+c9 */ 02524 02525 tmp13 = MULTIPLY(z2, - FIX(0.831253876)); /* -c9 */ 02526 tmp15 = MULTIPLY(z2, - FIX(1.344997024)); /* -c3 */ 02527 z2 = z1 - z4; 02528 tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353)); /* c1 */ 02529 02530 tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */ 02531 tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */ 02532 tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3; /* c5 */ 02533 z2 = MULTIPLY(z1 + z4, FIX(0.575212477)); /* c11 */ 02534 tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3; /* c7-c11 */ 02535 tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3; /* c11+c13 */ 02536 02537 /* Final output stage */ 02538 02539 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 02540 CONST_BITS+PASS1_BITS+3) 02541 & RANGE_MASK]; 02542 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 02543 CONST_BITS+PASS1_BITS+3) 02544 & RANGE_MASK]; 02545 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 02546 CONST_BITS+PASS1_BITS+3) 02547 & RANGE_MASK]; 02548 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 02549 CONST_BITS+PASS1_BITS+3) 02550 & RANGE_MASK]; 02551 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 02552 CONST_BITS+PASS1_BITS+3) 02553 & RANGE_MASK]; 02554 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 02555 CONST_BITS+PASS1_BITS+3) 02556 & RANGE_MASK]; 02557 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 02558 CONST_BITS+PASS1_BITS+3) 02559 & RANGE_MASK]; 02560 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 02561 CONST_BITS+PASS1_BITS+3) 02562 & RANGE_MASK]; 02563 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 02564 CONST_BITS+PASS1_BITS+3) 02565 & RANGE_MASK]; 02566 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 02567 CONST_BITS+PASS1_BITS+3) 02568 & RANGE_MASK]; 02569 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 02570 CONST_BITS+PASS1_BITS+3) 02571 & RANGE_MASK]; 02572 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 02573 CONST_BITS+PASS1_BITS+3) 02574 & RANGE_MASK]; 02575 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 02576 CONST_BITS+PASS1_BITS+3) 02577 & RANGE_MASK]; 02578 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 02579 CONST_BITS+PASS1_BITS+3) 02580 & RANGE_MASK]; 02581 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27, 02582 CONST_BITS+PASS1_BITS+3) 02583 & RANGE_MASK]; 02584 02585 wsptr += 8; /* advance pointer to next row */ 02586 } 02587 } 02588 02589 02590 /* 02591 * Perform dequantization and inverse DCT on one block of coefficients, 02592 * producing a 16x16 output block. 02593 * 02594 * Optimized algorithm with 28 multiplications in the 1-D kernel. 02595 * cK represents sqrt(2) * cos(K*pi/32). 02596 */ 02597 02598 GLOBAL(void) 02599 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 02600 JCOEFPTR coef_block, 02601 JSAMPARRAY output_buf, JDIMENSION output_col) 02602 { 02603 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 02604 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 02605 INT32 z1, z2, z3, z4; 02606 JCOEFPTR inptr; 02607 ISLOW_MULT_TYPE * quantptr; 02608 int * wsptr; 02609 JSAMPROW outptr; 02610 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 02611 int ctr; 02612 int workspace[8*16]; /* buffers data between passes */ 02613 SHIFT_TEMPS 02614 02615 /* Pass 1: process columns from input, store into work array. */ 02616 02617 inptr = coef_block; 02618 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 02619 wsptr = workspace; 02620 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 02621 /* Even part */ 02622 02623 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 02624 tmp0 <<= CONST_BITS; 02625 /* Add fudge factor here for final descale. */ 02626 tmp0 += 1 << (CONST_BITS-PASS1_BITS-1); 02627 02628 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 02629 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 02630 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 02631 02632 tmp10 = tmp0 + tmp1; 02633 tmp11 = tmp0 - tmp1; 02634 tmp12 = tmp0 + tmp2; 02635 tmp13 = tmp0 - tmp2; 02636 02637 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 02638 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 02639 z3 = z1 - z2; 02640 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 02641 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 02642 02643 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 02644 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 02645 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 02646 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 02647 02648 tmp20 = tmp10 + tmp0; 02649 tmp27 = tmp10 - tmp0; 02650 tmp21 = tmp12 + tmp1; 02651 tmp26 = tmp12 - tmp1; 02652 tmp22 = tmp13 + tmp2; 02653 tmp25 = tmp13 - tmp2; 02654 tmp23 = tmp11 + tmp3; 02655 tmp24 = tmp11 - tmp3; 02656 02657 /* Odd part */ 02658 02659 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 02660 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 02661 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 02662 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 02663 02664 tmp11 = z1 + z3; 02665 02666 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 02667 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 02668 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 02669 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 02670 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 02671 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 02672 tmp0 = tmp1 + tmp2 + tmp3 - 02673 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 02674 tmp13 = tmp10 + tmp11 + tmp12 - 02675 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 02676 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 02677 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 02678 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 02679 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 02680 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 02681 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 02682 z2 += z4; 02683 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 02684 tmp1 += z1; 02685 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 02686 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 02687 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 02688 tmp12 += z2; 02689 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 02690 tmp2 += z2; 02691 tmp3 += z2; 02692 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 02693 tmp10 += z2; 02694 tmp11 += z2; 02695 02696 /* Final output stage */ 02697 02698 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); 02699 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); 02700 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); 02701 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); 02702 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); 02703 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); 02704 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); 02705 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); 02706 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); 02707 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); 02708 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); 02709 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); 02710 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); 02711 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); 02712 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); 02713 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); 02714 } 02715 02716 /* Pass 2: process 16 rows from work array, store into output array. */ 02717 02718 wsptr = workspace; 02719 for (ctr = 0; ctr < 16; ctr++) { 02720 outptr = output_buf[ctr] + output_col; 02721 02722 /* Even part */ 02723 02724 /* Add range center and fudge factor for final descale and range-limit. */ 02725 tmp0 = (INT32) wsptr[0] + 02726 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 02727 (ONE << (PASS1_BITS+2))); 02728 tmp0 <<= CONST_BITS; 02729 02730 z1 = (INT32) wsptr[4]; 02731 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 02732 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 02733 02734 tmp10 = tmp0 + tmp1; 02735 tmp11 = tmp0 - tmp1; 02736 tmp12 = tmp0 + tmp2; 02737 tmp13 = tmp0 - tmp2; 02738 02739 z1 = (INT32) wsptr[2]; 02740 z2 = (INT32) wsptr[6]; 02741 z3 = z1 - z2; 02742 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 02743 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 02744 02745 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 02746 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 02747 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 02748 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 02749 02750 tmp20 = tmp10 + tmp0; 02751 tmp27 = tmp10 - tmp0; 02752 tmp21 = tmp12 + tmp1; 02753 tmp26 = tmp12 - tmp1; 02754 tmp22 = tmp13 + tmp2; 02755 tmp25 = tmp13 - tmp2; 02756 tmp23 = tmp11 + tmp3; 02757 tmp24 = tmp11 - tmp3; 02758 02759 /* Odd part */ 02760 02761 z1 = (INT32) wsptr[1]; 02762 z2 = (INT32) wsptr[3]; 02763 z3 = (INT32) wsptr[5]; 02764 z4 = (INT32) wsptr[7]; 02765 02766 tmp11 = z1 + z3; 02767 02768 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 02769 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 02770 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 02771 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 02772 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 02773 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 02774 tmp0 = tmp1 + tmp2 + tmp3 - 02775 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 02776 tmp13 = tmp10 + tmp11 + tmp12 - 02777 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 02778 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 02779 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 02780 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 02781 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 02782 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 02783 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 02784 z2 += z4; 02785 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 02786 tmp1 += z1; 02787 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 02788 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 02789 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 02790 tmp12 += z2; 02791 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 02792 tmp2 += z2; 02793 tmp3 += z2; 02794 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 02795 tmp10 += z2; 02796 tmp11 += z2; 02797 02798 /* Final output stage */ 02799 02800 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, 02801 CONST_BITS+PASS1_BITS+3) 02802 & RANGE_MASK]; 02803 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, 02804 CONST_BITS+PASS1_BITS+3) 02805 & RANGE_MASK]; 02806 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, 02807 CONST_BITS+PASS1_BITS+3) 02808 & RANGE_MASK]; 02809 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, 02810 CONST_BITS+PASS1_BITS+3) 02811 & RANGE_MASK]; 02812 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, 02813 CONST_BITS+PASS1_BITS+3) 02814 & RANGE_MASK]; 02815 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, 02816 CONST_BITS+PASS1_BITS+3) 02817 & RANGE_MASK]; 02818 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, 02819 CONST_BITS+PASS1_BITS+3) 02820 & RANGE_MASK]; 02821 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, 02822 CONST_BITS+PASS1_BITS+3) 02823 & RANGE_MASK]; 02824 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, 02825 CONST_BITS+PASS1_BITS+3) 02826 & RANGE_MASK]; 02827 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, 02828 CONST_BITS+PASS1_BITS+3) 02829 & RANGE_MASK]; 02830 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, 02831 CONST_BITS+PASS1_BITS+3) 02832 & RANGE_MASK]; 02833 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, 02834 CONST_BITS+PASS1_BITS+3) 02835 & RANGE_MASK]; 02836 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, 02837 CONST_BITS+PASS1_BITS+3) 02838 & RANGE_MASK]; 02839 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, 02840 CONST_BITS+PASS1_BITS+3) 02841 & RANGE_MASK]; 02842 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, 02843 CONST_BITS+PASS1_BITS+3) 02844 & RANGE_MASK]; 02845 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, 02846 CONST_BITS+PASS1_BITS+3) 02847 & RANGE_MASK]; 02848 02849 wsptr += 8; /* advance pointer to next row */ 02850 } 02851 } 02852 02853 02854 /* 02855 * Perform dequantization and inverse DCT on one block of coefficients, 02856 * producing a 16x8 output block. 02857 * 02858 * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows). 02859 */ 02860 02861 GLOBAL(void) 02862 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 02863 JCOEFPTR coef_block, 02864 JSAMPARRAY output_buf, JDIMENSION output_col) 02865 { 02866 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 02867 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 02868 INT32 z1, z2, z3, z4; 02869 JCOEFPTR inptr; 02870 ISLOW_MULT_TYPE * quantptr; 02871 int * wsptr; 02872 JSAMPROW outptr; 02873 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 02874 int ctr; 02875 int workspace[8*8]; /* buffers data between passes */ 02876 SHIFT_TEMPS 02877 02878 /* Pass 1: process columns from input, store into work array. 02879 * Note results are scaled up by sqrt(8) compared to a true IDCT; 02880 * furthermore, we scale the results by 2**PASS1_BITS. 02881 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 02882 */ 02883 02884 inptr = coef_block; 02885 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 02886 wsptr = workspace; 02887 for (ctr = DCTSIZE; ctr > 0; ctr--) { 02888 /* Due to quantization, we will usually find that many of the input 02889 * coefficients are zero, especially the AC terms. We can exploit this 02890 * by short-circuiting the IDCT calculation for any column in which all 02891 * the AC terms are zero. In that case each output is equal to the 02892 * DC coefficient (with scale factor as needed). 02893 * With typical images and quantization tables, half or more of the 02894 * column DCT calculations can be simplified this way. 02895 */ 02896 02897 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 02898 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 02899 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 02900 inptr[DCTSIZE*7] == 0) { 02901 /* AC terms all zero */ 02902 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 02903 02904 wsptr[DCTSIZE*0] = dcval; 02905 wsptr[DCTSIZE*1] = dcval; 02906 wsptr[DCTSIZE*2] = dcval; 02907 wsptr[DCTSIZE*3] = dcval; 02908 wsptr[DCTSIZE*4] = dcval; 02909 wsptr[DCTSIZE*5] = dcval; 02910 wsptr[DCTSIZE*6] = dcval; 02911 wsptr[DCTSIZE*7] = dcval; 02912 02913 inptr++; /* advance pointers to next column */ 02914 quantptr++; 02915 wsptr++; 02916 continue; 02917 } 02918 02919 /* Even part: reverse the even part of the forward DCT. 02920 * The rotator is c(-6). 02921 */ 02922 02923 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 02924 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 02925 02926 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 02927 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 02928 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 02929 02930 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 02931 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 02932 z2 <<= CONST_BITS; 02933 z3 <<= CONST_BITS; 02934 /* Add fudge factor here for final descale. */ 02935 z2 += ONE << (CONST_BITS-PASS1_BITS-1); 02936 02937 tmp0 = z2 + z3; 02938 tmp1 = z2 - z3; 02939 02940 tmp10 = tmp0 + tmp2; 02941 tmp13 = tmp0 - tmp2; 02942 tmp11 = tmp1 + tmp3; 02943 tmp12 = tmp1 - tmp3; 02944 02945 /* Odd part per figure 8; the matrix is unitary and hence its 02946 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 02947 */ 02948 02949 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 02950 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 02951 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 02952 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 02953 02954 z2 = tmp0 + tmp2; 02955 z3 = tmp1 + tmp3; 02956 02957 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 02958 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 02959 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 02960 z2 += z1; 02961 z3 += z1; 02962 02963 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 02964 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 02965 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 02966 tmp0 += z1 + z2; 02967 tmp3 += z1 + z3; 02968 02969 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 02970 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 02971 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 02972 tmp1 += z1 + z3; 02973 tmp2 += z1 + z2; 02974 02975 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 02976 02977 wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 02978 wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 02979 wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 02980 wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 02981 wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 02982 wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 02983 wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 02984 wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 02985 02986 inptr++; /* advance pointers to next column */ 02987 quantptr++; 02988 wsptr++; 02989 } 02990 02991 /* Pass 2: process 8 rows from work array, store into output array. 02992 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). 02993 */ 02994 02995 wsptr = workspace; 02996 for (ctr = 0; ctr < 8; ctr++) { 02997 outptr = output_buf[ctr] + output_col; 02998 02999 /* Even part */ 03000 03001 /* Add range center and fudge factor for final descale and range-limit. */ 03002 tmp0 = (INT32) wsptr[0] + 03003 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 03004 (ONE << (PASS1_BITS+2))); 03005 tmp0 <<= CONST_BITS; 03006 03007 z1 = (INT32) wsptr[4]; 03008 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 03009 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 03010 03011 tmp10 = tmp0 + tmp1; 03012 tmp11 = tmp0 - tmp1; 03013 tmp12 = tmp0 + tmp2; 03014 tmp13 = tmp0 - tmp2; 03015 03016 z1 = (INT32) wsptr[2]; 03017 z2 = (INT32) wsptr[6]; 03018 z3 = z1 - z2; 03019 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 03020 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 03021 03022 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 03023 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 03024 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 03025 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 03026 03027 tmp20 = tmp10 + tmp0; 03028 tmp27 = tmp10 - tmp0; 03029 tmp21 = tmp12 + tmp1; 03030 tmp26 = tmp12 - tmp1; 03031 tmp22 = tmp13 + tmp2; 03032 tmp25 = tmp13 - tmp2; 03033 tmp23 = tmp11 + tmp3; 03034 tmp24 = tmp11 - tmp3; 03035 03036 /* Odd part */ 03037 03038 z1 = (INT32) wsptr[1]; 03039 z2 = (INT32) wsptr[3]; 03040 z3 = (INT32) wsptr[5]; 03041 z4 = (INT32) wsptr[7]; 03042 03043 tmp11 = z1 + z3; 03044 03045 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 03046 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 03047 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 03048 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 03049 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 03050 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 03051 tmp0 = tmp1 + tmp2 + tmp3 - 03052 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 03053 tmp13 = tmp10 + tmp11 + tmp12 - 03054 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 03055 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 03056 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 03057 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 03058 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 03059 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 03060 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 03061 z2 += z4; 03062 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 03063 tmp1 += z1; 03064 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 03065 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 03066 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 03067 tmp12 += z2; 03068 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 03069 tmp2 += z2; 03070 tmp3 += z2; 03071 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 03072 tmp10 += z2; 03073 tmp11 += z2; 03074 03075 /* Final output stage */ 03076 03077 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0, 03078 CONST_BITS+PASS1_BITS+3) 03079 & RANGE_MASK]; 03080 outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0, 03081 CONST_BITS+PASS1_BITS+3) 03082 & RANGE_MASK]; 03083 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1, 03084 CONST_BITS+PASS1_BITS+3) 03085 & RANGE_MASK]; 03086 outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1, 03087 CONST_BITS+PASS1_BITS+3) 03088 & RANGE_MASK]; 03089 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2, 03090 CONST_BITS+PASS1_BITS+3) 03091 & RANGE_MASK]; 03092 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2, 03093 CONST_BITS+PASS1_BITS+3) 03094 & RANGE_MASK]; 03095 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3, 03096 CONST_BITS+PASS1_BITS+3) 03097 & RANGE_MASK]; 03098 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3, 03099 CONST_BITS+PASS1_BITS+3) 03100 & RANGE_MASK]; 03101 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10, 03102 CONST_BITS+PASS1_BITS+3) 03103 & RANGE_MASK]; 03104 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10, 03105 CONST_BITS+PASS1_BITS+3) 03106 & RANGE_MASK]; 03107 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11, 03108 CONST_BITS+PASS1_BITS+3) 03109 & RANGE_MASK]; 03110 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11, 03111 CONST_BITS+PASS1_BITS+3) 03112 & RANGE_MASK]; 03113 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12, 03114 CONST_BITS+PASS1_BITS+3) 03115 & RANGE_MASK]; 03116 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12, 03117 CONST_BITS+PASS1_BITS+3) 03118 & RANGE_MASK]; 03119 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13, 03120 CONST_BITS+PASS1_BITS+3) 03121 & RANGE_MASK]; 03122 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13, 03123 CONST_BITS+PASS1_BITS+3) 03124 & RANGE_MASK]; 03125 03126 wsptr += 8; /* advance pointer to next row */ 03127 } 03128 } 03129 03130 03131 /* 03132 * Perform dequantization and inverse DCT on one block of coefficients, 03133 * producing a 14x7 output block. 03134 * 03135 * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows). 03136 */ 03137 03138 GLOBAL(void) 03139 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 03140 JCOEFPTR coef_block, 03141 JSAMPARRAY output_buf, JDIMENSION output_col) 03142 { 03143 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 03144 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 03145 INT32 z1, z2, z3, z4; 03146 JCOEFPTR inptr; 03147 ISLOW_MULT_TYPE * quantptr; 03148 int * wsptr; 03149 JSAMPROW outptr; 03150 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 03151 int ctr; 03152 int workspace[8*7]; /* buffers data between passes */ 03153 SHIFT_TEMPS 03154 03155 /* Pass 1: process columns from input, store into work array. 03156 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). 03157 */ 03158 03159 inptr = coef_block; 03160 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 03161 wsptr = workspace; 03162 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 03163 /* Even part */ 03164 03165 tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 03166 tmp23 <<= CONST_BITS; 03167 /* Add fudge factor here for final descale. */ 03168 tmp23 += ONE << (CONST_BITS-PASS1_BITS-1); 03169 03170 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 03171 z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 03172 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 03173 03174 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 03175 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 03176 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 03177 tmp10 = z1 + z3; 03178 z2 -= tmp10; 03179 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ 03180 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 03181 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 03182 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 03183 03184 /* Odd part */ 03185 03186 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 03187 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 03188 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 03189 03190 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 03191 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 03192 tmp10 = tmp11 - tmp12; 03193 tmp11 += tmp12; 03194 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 03195 tmp11 += tmp12; 03196 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 03197 tmp10 += z2; 03198 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 03199 03200 /* Final output stage */ 03201 03202 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 03203 wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 03204 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 03205 wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 03206 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 03207 wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 03208 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS); 03209 } 03210 03211 /* Pass 2: process 7 rows from work array, store into output array. 03212 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). 03213 */ 03214 03215 wsptr = workspace; 03216 for (ctr = 0; ctr < 7; ctr++) { 03217 outptr = output_buf[ctr] + output_col; 03218 03219 /* Even part */ 03220 03221 /* Add range center and fudge factor for final descale and range-limit. */ 03222 z1 = (INT32) wsptr[0] + 03223 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 03224 (ONE << (PASS1_BITS+2))); 03225 z1 <<= CONST_BITS; 03226 z4 = (INT32) wsptr[4]; 03227 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 03228 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 03229 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 03230 03231 tmp10 = z1 + z2; 03232 tmp11 = z1 + z3; 03233 tmp12 = z1 - z4; 03234 03235 tmp23 = z1 - ((z2 + z3 - z4) << 1); /* c0 = (c4+c12-c8)*2 */ 03236 03237 z1 = (INT32) wsptr[2]; 03238 z2 = (INT32) wsptr[6]; 03239 03240 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 03241 03242 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 03243 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 03244 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 03245 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 03246 03247 tmp20 = tmp10 + tmp13; 03248 tmp26 = tmp10 - tmp13; 03249 tmp21 = tmp11 + tmp14; 03250 tmp25 = tmp11 - tmp14; 03251 tmp22 = tmp12 + tmp15; 03252 tmp24 = tmp12 - tmp15; 03253 03254 /* Odd part */ 03255 03256 z1 = (INT32) wsptr[1]; 03257 z2 = (INT32) wsptr[3]; 03258 z3 = (INT32) wsptr[5]; 03259 z4 = (INT32) wsptr[7]; 03260 z4 <<= CONST_BITS; 03261 03262 tmp14 = z1 + z3; 03263 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 03264 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 03265 tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 03266 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 03267 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 03268 z1 -= z2; 03269 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4; /* c11 */ 03270 tmp16 += tmp15; 03271 tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4; /* -c13 */ 03272 tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 03273 tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 03274 tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 03275 tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 03276 tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 03277 03278 tmp13 = ((z1 - z3) << CONST_BITS) + z4; 03279 03280 /* Final output stage */ 03281 03282 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 03283 CONST_BITS+PASS1_BITS+3) 03284 & RANGE_MASK]; 03285 outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 03286 CONST_BITS+PASS1_BITS+3) 03287 & RANGE_MASK]; 03288 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 03289 CONST_BITS+PASS1_BITS+3) 03290 & RANGE_MASK]; 03291 outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 03292 CONST_BITS+PASS1_BITS+3) 03293 & RANGE_MASK]; 03294 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 03295 CONST_BITS+PASS1_BITS+3) 03296 & RANGE_MASK]; 03297 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 03298 CONST_BITS+PASS1_BITS+3) 03299 & RANGE_MASK]; 03300 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 03301 CONST_BITS+PASS1_BITS+3) 03302 & RANGE_MASK]; 03303 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 03304 CONST_BITS+PASS1_BITS+3) 03305 & RANGE_MASK]; 03306 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 03307 CONST_BITS+PASS1_BITS+3) 03308 & RANGE_MASK]; 03309 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 03310 CONST_BITS+PASS1_BITS+3) 03311 & RANGE_MASK]; 03312 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 03313 CONST_BITS+PASS1_BITS+3) 03314 & RANGE_MASK]; 03315 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 03316 CONST_BITS+PASS1_BITS+3) 03317 & RANGE_MASK]; 03318 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16, 03319 CONST_BITS+PASS1_BITS+3) 03320 & RANGE_MASK]; 03321 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16, 03322 CONST_BITS+PASS1_BITS+3) 03323 & RANGE_MASK]; 03324 03325 wsptr += 8; /* advance pointer to next row */ 03326 } 03327 } 03328 03329 03330 /* 03331 * Perform dequantization and inverse DCT on one block of coefficients, 03332 * producing a 12x6 output block. 03333 * 03334 * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows). 03335 */ 03336 03337 GLOBAL(void) 03338 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 03339 JCOEFPTR coef_block, 03340 JSAMPARRAY output_buf, JDIMENSION output_col) 03341 { 03342 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 03343 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 03344 INT32 z1, z2, z3, z4; 03345 JCOEFPTR inptr; 03346 ISLOW_MULT_TYPE * quantptr; 03347 int * wsptr; 03348 JSAMPROW outptr; 03349 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 03350 int ctr; 03351 int workspace[8*6]; /* buffers data between passes */ 03352 SHIFT_TEMPS 03353 03354 /* Pass 1: process columns from input, store into work array. 03355 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 03356 */ 03357 03358 inptr = coef_block; 03359 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 03360 wsptr = workspace; 03361 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 03362 /* Even part */ 03363 03364 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 03365 tmp10 <<= CONST_BITS; 03366 /* Add fudge factor here for final descale. */ 03367 tmp10 += ONE << (CONST_BITS-PASS1_BITS-1); 03368 tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 03369 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ 03370 tmp11 = tmp10 + tmp20; 03371 tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS); 03372 tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 03373 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ 03374 tmp20 = tmp11 + tmp10; 03375 tmp22 = tmp11 - tmp10; 03376 03377 /* Odd part */ 03378 03379 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 03380 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 03381 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 03382 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 03383 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); 03384 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); 03385 tmp11 = (z1 - z2 - z3) << PASS1_BITS; 03386 03387 /* Final output stage */ 03388 03389 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 03390 wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 03391 wsptr[8*1] = (int) (tmp21 + tmp11); 03392 wsptr[8*4] = (int) (tmp21 - tmp11); 03393 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 03394 wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 03395 } 03396 03397 /* Pass 2: process 6 rows from work array, store into output array. 03398 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). 03399 */ 03400 03401 wsptr = workspace; 03402 for (ctr = 0; ctr < 6; ctr++) { 03403 outptr = output_buf[ctr] + output_col; 03404 03405 /* Even part */ 03406 03407 /* Add range center and fudge factor for final descale and range-limit. */ 03408 z3 = (INT32) wsptr[0] + 03409 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 03410 (ONE << (PASS1_BITS+2))); 03411 z3 <<= CONST_BITS; 03412 03413 z4 = (INT32) wsptr[4]; 03414 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 03415 03416 tmp10 = z3 + z4; 03417 tmp11 = z3 - z4; 03418 03419 z1 = (INT32) wsptr[2]; 03420 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 03421 z1 <<= CONST_BITS; 03422 z2 = (INT32) wsptr[6]; 03423 z2 <<= CONST_BITS; 03424 03425 tmp12 = z1 - z2; 03426 03427 tmp21 = z3 + tmp12; 03428 tmp24 = z3 - tmp12; 03429 03430 tmp12 = z4 + z2; 03431 03432 tmp20 = tmp10 + tmp12; 03433 tmp25 = tmp10 - tmp12; 03434 03435 tmp12 = z4 - z1 - z2; 03436 03437 tmp22 = tmp11 + tmp12; 03438 tmp23 = tmp11 - tmp12; 03439 03440 /* Odd part */ 03441 03442 z1 = (INT32) wsptr[1]; 03443 z2 = (INT32) wsptr[3]; 03444 z3 = (INT32) wsptr[5]; 03445 z4 = (INT32) wsptr[7]; 03446 03447 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 03448 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 03449 03450 tmp10 = z1 + z3; 03451 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 03452 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 03453 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 03454 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 03455 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 03456 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 03457 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 03458 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 03459 03460 z1 -= z4; 03461 z2 -= z3; 03462 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 03463 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 03464 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 03465 03466 /* Final output stage */ 03467 03468 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 03469 CONST_BITS+PASS1_BITS+3) 03470 & RANGE_MASK]; 03471 outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 03472 CONST_BITS+PASS1_BITS+3) 03473 & RANGE_MASK]; 03474 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 03475 CONST_BITS+PASS1_BITS+3) 03476 & RANGE_MASK]; 03477 outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 03478 CONST_BITS+PASS1_BITS+3) 03479 & RANGE_MASK]; 03480 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 03481 CONST_BITS+PASS1_BITS+3) 03482 & RANGE_MASK]; 03483 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 03484 CONST_BITS+PASS1_BITS+3) 03485 & RANGE_MASK]; 03486 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 03487 CONST_BITS+PASS1_BITS+3) 03488 & RANGE_MASK]; 03489 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 03490 CONST_BITS+PASS1_BITS+3) 03491 & RANGE_MASK]; 03492 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 03493 CONST_BITS+PASS1_BITS+3) 03494 & RANGE_MASK]; 03495 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 03496 CONST_BITS+PASS1_BITS+3) 03497 & RANGE_MASK]; 03498 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15, 03499 CONST_BITS+PASS1_BITS+3) 03500 & RANGE_MASK]; 03501 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15, 03502 CONST_BITS+PASS1_BITS+3) 03503 & RANGE_MASK]; 03504 03505 wsptr += 8; /* advance pointer to next row */ 03506 } 03507 } 03508 03509 03510 /* 03511 * Perform dequantization and inverse DCT on one block of coefficients, 03512 * producing a 10x5 output block. 03513 * 03514 * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows). 03515 */ 03516 03517 GLOBAL(void) 03518 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 03519 JCOEFPTR coef_block, 03520 JSAMPARRAY output_buf, JDIMENSION output_col) 03521 { 03522 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 03523 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 03524 INT32 z1, z2, z3, z4; 03525 JCOEFPTR inptr; 03526 ISLOW_MULT_TYPE * quantptr; 03527 int * wsptr; 03528 JSAMPROW outptr; 03529 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 03530 int ctr; 03531 int workspace[8*5]; /* buffers data between passes */ 03532 SHIFT_TEMPS 03533 03534 /* Pass 1: process columns from input, store into work array. 03535 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). 03536 */ 03537 03538 inptr = coef_block; 03539 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 03540 wsptr = workspace; 03541 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 03542 /* Even part */ 03543 03544 tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 03545 tmp12 <<= CONST_BITS; 03546 /* Add fudge factor here for final descale. */ 03547 tmp12 += ONE << (CONST_BITS-PASS1_BITS-1); 03548 tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 03549 tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 03550 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ 03551 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ 03552 z3 = tmp12 + z2; 03553 tmp10 = z3 + z1; 03554 tmp11 = z3 - z1; 03555 tmp12 -= z2 << 2; 03556 03557 /* Odd part */ 03558 03559 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 03560 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 03561 03562 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 03563 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 03564 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 03565 03566 /* Final output stage */ 03567 03568 wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS); 03569 wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS); 03570 wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS); 03571 wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS); 03572 wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS); 03573 } 03574 03575 /* Pass 2: process 5 rows from work array, store into output array. 03576 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). 03577 */ 03578 03579 wsptr = workspace; 03580 for (ctr = 0; ctr < 5; ctr++) { 03581 outptr = output_buf[ctr] + output_col; 03582 03583 /* Even part */ 03584 03585 /* Add range center and fudge factor for final descale and range-limit. */ 03586 z3 = (INT32) wsptr[0] + 03587 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 03588 (ONE << (PASS1_BITS+2))); 03589 z3 <<= CONST_BITS; 03590 z4 = (INT32) wsptr[4]; 03591 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 03592 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 03593 tmp10 = z3 + z1; 03594 tmp11 = z3 - z2; 03595 03596 tmp22 = z3 - ((z1 - z2) << 1); /* c0 = (c4-c8)*2 */ 03597 03598 z2 = (INT32) wsptr[2]; 03599 z3 = (INT32) wsptr[6]; 03600 03601 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 03602 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 03603 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 03604 03605 tmp20 = tmp10 + tmp12; 03606 tmp24 = tmp10 - tmp12; 03607 tmp21 = tmp11 + tmp13; 03608 tmp23 = tmp11 - tmp13; 03609 03610 /* Odd part */ 03611 03612 z1 = (INT32) wsptr[1]; 03613 z2 = (INT32) wsptr[3]; 03614 z3 = (INT32) wsptr[5]; 03615 z3 <<= CONST_BITS; 03616 z4 = (INT32) wsptr[7]; 03617 03618 tmp11 = z2 + z4; 03619 tmp13 = z2 - z4; 03620 03621 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 03622 03623 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 03624 z4 = z3 + tmp12; 03625 03626 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 03627 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 03628 03629 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 03630 z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1)); 03631 03632 tmp12 = ((z1 - tmp13) << CONST_BITS) - z3; 03633 03634 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 03635 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 03636 03637 /* Final output stage */ 03638 03639 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 03640 CONST_BITS+PASS1_BITS+3) 03641 & RANGE_MASK]; 03642 outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 03643 CONST_BITS+PASS1_BITS+3) 03644 & RANGE_MASK]; 03645 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 03646 CONST_BITS+PASS1_BITS+3) 03647 & RANGE_MASK]; 03648 outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 03649 CONST_BITS+PASS1_BITS+3) 03650 & RANGE_MASK]; 03651 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 03652 CONST_BITS+PASS1_BITS+3) 03653 & RANGE_MASK]; 03654 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 03655 CONST_BITS+PASS1_BITS+3) 03656 & RANGE_MASK]; 03657 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13, 03658 CONST_BITS+PASS1_BITS+3) 03659 & RANGE_MASK]; 03660 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13, 03661 CONST_BITS+PASS1_BITS+3) 03662 & RANGE_MASK]; 03663 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14, 03664 CONST_BITS+PASS1_BITS+3) 03665 & RANGE_MASK]; 03666 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14, 03667 CONST_BITS+PASS1_BITS+3) 03668 & RANGE_MASK]; 03669 03670 wsptr += 8; /* advance pointer to next row */ 03671 } 03672 } 03673 03674 03675 /* 03676 * Perform dequantization and inverse DCT on one block of coefficients, 03677 * producing a 8x4 output block. 03678 * 03679 * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). 03680 */ 03681 03682 GLOBAL(void) 03683 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 03684 JCOEFPTR coef_block, 03685 JSAMPARRAY output_buf, JDIMENSION output_col) 03686 { 03687 INT32 tmp0, tmp1, tmp2, tmp3; 03688 INT32 tmp10, tmp11, tmp12, tmp13; 03689 INT32 z1, z2, z3; 03690 JCOEFPTR inptr; 03691 ISLOW_MULT_TYPE * quantptr; 03692 int * wsptr; 03693 JSAMPROW outptr; 03694 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 03695 int ctr; 03696 int workspace[8*4]; /* buffers data between passes */ 03697 SHIFT_TEMPS 03698 03699 /* Pass 1: process columns from input, store into work array. 03700 * 4-point IDCT kernel, 03701 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 03702 */ 03703 03704 inptr = coef_block; 03705 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 03706 wsptr = workspace; 03707 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 03708 /* Even part */ 03709 03710 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 03711 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 03712 03713 tmp10 = (tmp0 + tmp2) << PASS1_BITS; 03714 tmp12 = (tmp0 - tmp2) << PASS1_BITS; 03715 03716 /* Odd part */ 03717 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 03718 03719 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 03720 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 03721 03722 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 03723 /* Add fudge factor here for final descale. */ 03724 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 03725 tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */ 03726 CONST_BITS-PASS1_BITS); 03727 tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */ 03728 CONST_BITS-PASS1_BITS); 03729 03730 /* Final output stage */ 03731 03732 wsptr[8*0] = (int) (tmp10 + tmp0); 03733 wsptr[8*3] = (int) (tmp10 - tmp0); 03734 wsptr[8*1] = (int) (tmp12 + tmp2); 03735 wsptr[8*2] = (int) (tmp12 - tmp2); 03736 } 03737 03738 /* Pass 2: process rows from work array, store into output array. 03739 * Note that we must descale the results by a factor of 8 == 2**3, 03740 * and also undo the PASS1_BITS scaling. 03741 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 03742 */ 03743 03744 wsptr = workspace; 03745 for (ctr = 0; ctr < 4; ctr++) { 03746 outptr = output_buf[ctr] + output_col; 03747 03748 /* Even part: reverse the even part of the forward DCT. 03749 * The rotator is c(-6). 03750 */ 03751 03752 /* Add range center and fudge factor for final descale and range-limit. */ 03753 z2 = (INT32) wsptr[0] + 03754 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 03755 (ONE << (PASS1_BITS+2))); 03756 z3 = (INT32) wsptr[4]; 03757 03758 tmp0 = (z2 + z3) << CONST_BITS; 03759 tmp1 = (z2 - z3) << CONST_BITS; 03760 03761 z2 = (INT32) wsptr[2]; 03762 z3 = (INT32) wsptr[6]; 03763 03764 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 03765 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 03766 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 03767 03768 tmp10 = tmp0 + tmp2; 03769 tmp13 = tmp0 - tmp2; 03770 tmp11 = tmp1 + tmp3; 03771 tmp12 = tmp1 - tmp3; 03772 03773 /* Odd part per figure 8; the matrix is unitary and hence its 03774 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 03775 */ 03776 03777 tmp0 = (INT32) wsptr[7]; 03778 tmp1 = (INT32) wsptr[5]; 03779 tmp2 = (INT32) wsptr[3]; 03780 tmp3 = (INT32) wsptr[1]; 03781 03782 z2 = tmp0 + tmp2; 03783 z3 = tmp1 + tmp3; 03784 03785 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 03786 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 03787 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 03788 z2 += z1; 03789 z3 += z1; 03790 03791 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 03792 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 03793 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 03794 tmp0 += z1 + z2; 03795 tmp3 += z1 + z3; 03796 03797 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 03798 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 03799 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 03800 tmp1 += z1 + z3; 03801 tmp2 += z1 + z2; 03802 03803 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 03804 03805 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, 03806 CONST_BITS+PASS1_BITS+3) 03807 & RANGE_MASK]; 03808 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, 03809 CONST_BITS+PASS1_BITS+3) 03810 & RANGE_MASK]; 03811 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, 03812 CONST_BITS+PASS1_BITS+3) 03813 & RANGE_MASK]; 03814 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, 03815 CONST_BITS+PASS1_BITS+3) 03816 & RANGE_MASK]; 03817 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, 03818 CONST_BITS+PASS1_BITS+3) 03819 & RANGE_MASK]; 03820 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, 03821 CONST_BITS+PASS1_BITS+3) 03822 & RANGE_MASK]; 03823 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, 03824 CONST_BITS+PASS1_BITS+3) 03825 & RANGE_MASK]; 03826 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, 03827 CONST_BITS+PASS1_BITS+3) 03828 & RANGE_MASK]; 03829 03830 wsptr += DCTSIZE; /* advance pointer to next row */ 03831 } 03832 } 03833 03834 03835 /* 03836 * Perform dequantization and inverse DCT on one block of coefficients, 03837 * producing a reduced-size 6x3 output block. 03838 * 03839 * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). 03840 */ 03841 03842 GLOBAL(void) 03843 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 03844 JCOEFPTR coef_block, 03845 JSAMPARRAY output_buf, JDIMENSION output_col) 03846 { 03847 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 03848 INT32 z1, z2, z3; 03849 JCOEFPTR inptr; 03850 ISLOW_MULT_TYPE * quantptr; 03851 int * wsptr; 03852 JSAMPROW outptr; 03853 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 03854 int ctr; 03855 int workspace[6*3]; /* buffers data between passes */ 03856 SHIFT_TEMPS 03857 03858 /* Pass 1: process columns from input, store into work array. 03859 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). 03860 */ 03861 03862 inptr = coef_block; 03863 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 03864 wsptr = workspace; 03865 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 03866 /* Even part */ 03867 03868 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 03869 tmp0 <<= CONST_BITS; 03870 /* Add fudge factor here for final descale. */ 03871 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 03872 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 03873 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 03874 tmp10 = tmp0 + tmp12; 03875 tmp2 = tmp0 - tmp12 - tmp12; 03876 03877 /* Odd part */ 03878 03879 tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 03880 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 03881 03882 /* Final output stage */ 03883 03884 wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 03885 wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 03886 wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS); 03887 } 03888 03889 /* Pass 2: process 3 rows from work array, store into output array. 03890 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 03891 */ 03892 03893 wsptr = workspace; 03894 for (ctr = 0; ctr < 3; ctr++) { 03895 outptr = output_buf[ctr] + output_col; 03896 03897 /* Even part */ 03898 03899 /* Add range center and fudge factor for final descale and range-limit. */ 03900 tmp0 = (INT32) wsptr[0] + 03901 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 03902 (ONE << (PASS1_BITS+2))); 03903 tmp0 <<= CONST_BITS; 03904 tmp2 = (INT32) wsptr[4]; 03905 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 03906 tmp1 = tmp0 + tmp10; 03907 tmp11 = tmp0 - tmp10 - tmp10; 03908 tmp10 = (INT32) wsptr[2]; 03909 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 03910 tmp10 = tmp1 + tmp0; 03911 tmp12 = tmp1 - tmp0; 03912 03913 /* Odd part */ 03914 03915 z1 = (INT32) wsptr[1]; 03916 z2 = (INT32) wsptr[3]; 03917 z3 = (INT32) wsptr[5]; 03918 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 03919 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 03920 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 03921 tmp1 = (z1 - z2 - z3) << CONST_BITS; 03922 03923 /* Final output stage */ 03924 03925 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 03926 CONST_BITS+PASS1_BITS+3) 03927 & RANGE_MASK]; 03928 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 03929 CONST_BITS+PASS1_BITS+3) 03930 & RANGE_MASK]; 03931 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1, 03932 CONST_BITS+PASS1_BITS+3) 03933 & RANGE_MASK]; 03934 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1, 03935 CONST_BITS+PASS1_BITS+3) 03936 & RANGE_MASK]; 03937 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 03938 CONST_BITS+PASS1_BITS+3) 03939 & RANGE_MASK]; 03940 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 03941 CONST_BITS+PASS1_BITS+3) 03942 & RANGE_MASK]; 03943 03944 wsptr += 6; /* advance pointer to next row */ 03945 } 03946 } 03947 03948 03949 /* 03950 * Perform dequantization and inverse DCT on one block of coefficients, 03951 * producing a 4x2 output block. 03952 * 03953 * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). 03954 */ 03955 03956 GLOBAL(void) 03957 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 03958 JCOEFPTR coef_block, 03959 JSAMPARRAY output_buf, JDIMENSION output_col) 03960 { 03961 INT32 tmp0, tmp2, tmp10, tmp12; 03962 INT32 z1, z2, z3; 03963 JCOEFPTR inptr; 03964 ISLOW_MULT_TYPE * quantptr; 03965 INT32 * wsptr; 03966 JSAMPROW outptr; 03967 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 03968 int ctr; 03969 INT32 workspace[4*2]; /* buffers data between passes */ 03970 SHIFT_TEMPS 03971 03972 /* Pass 1: process columns from input, store into work array. */ 03973 03974 inptr = coef_block; 03975 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 03976 wsptr = workspace; 03977 for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) { 03978 /* Even part */ 03979 03980 tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 03981 03982 /* Odd part */ 03983 03984 tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 03985 03986 /* Final output stage */ 03987 03988 wsptr[4*0] = tmp10 + tmp0; 03989 wsptr[4*1] = tmp10 - tmp0; 03990 } 03991 03992 /* Pass 2: process 2 rows from work array, store into output array. 03993 * 4-point IDCT kernel, 03994 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 03995 */ 03996 03997 wsptr = workspace; 03998 for (ctr = 0; ctr < 2; ctr++) { 03999 outptr = output_buf[ctr] + output_col; 04000 04001 /* Even part */ 04002 04003 /* Add range center and fudge factor for final descale and range-limit. */ 04004 tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2)); 04005 tmp2 = wsptr[2]; 04006 04007 tmp10 = (tmp0 + tmp2) << CONST_BITS; 04008 tmp12 = (tmp0 - tmp2) << CONST_BITS; 04009 04010 /* Odd part */ 04011 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 04012 04013 z2 = wsptr[1]; 04014 z3 = wsptr[3]; 04015 04016 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 04017 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 04018 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 04019 04020 /* Final output stage */ 04021 04022 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 04023 CONST_BITS+3) 04024 & RANGE_MASK]; 04025 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 04026 CONST_BITS+3) 04027 & RANGE_MASK]; 04028 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 04029 CONST_BITS+3) 04030 & RANGE_MASK]; 04031 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 04032 CONST_BITS+3) 04033 & RANGE_MASK]; 04034 04035 wsptr += 4; /* advance pointer to next row */ 04036 } 04037 } 04038 04039 04040 /* 04041 * Perform dequantization and inverse DCT on one block of coefficients, 04042 * producing a 2x1 output block. 04043 * 04044 * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). 04045 */ 04046 04047 GLOBAL(void) 04048 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 04049 JCOEFPTR coef_block, 04050 JSAMPARRAY output_buf, JDIMENSION output_col) 04051 { 04052 DCTELEM tmp0, tmp1; 04053 ISLOW_MULT_TYPE * quantptr; 04054 JSAMPROW outptr; 04055 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 04056 ISHIFT_TEMPS 04057 04058 /* Pass 1: empty. */ 04059 04060 /* Pass 2: process 1 row from input, store into output array. */ 04061 04062 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 04063 outptr = output_buf[0] + output_col; 04064 04065 /* Even part */ 04066 04067 tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]); 04068 /* Add range center and fudge factor for final descale and range-limit. */ 04069 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 04070 04071 /* Odd part */ 04072 04073 tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]); 04074 04075 /* Final output stage */ 04076 04077 outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; 04078 outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; 04079 } 04080 04081 04082 /* 04083 * Perform dequantization and inverse DCT on one block of coefficients, 04084 * producing a 8x16 output block. 04085 * 04086 * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows). 04087 */ 04088 04089 GLOBAL(void) 04090 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 04091 JCOEFPTR coef_block, 04092 JSAMPARRAY output_buf, JDIMENSION output_col) 04093 { 04094 INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13; 04095 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27; 04096 INT32 z1, z2, z3, z4; 04097 JCOEFPTR inptr; 04098 ISLOW_MULT_TYPE * quantptr; 04099 int * wsptr; 04100 JSAMPROW outptr; 04101 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 04102 int ctr; 04103 int workspace[8*16]; /* buffers data between passes */ 04104 SHIFT_TEMPS 04105 04106 /* Pass 1: process columns from input, store into work array. 04107 * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32). 04108 */ 04109 04110 inptr = coef_block; 04111 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 04112 wsptr = workspace; 04113 for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) { 04114 /* Even part */ 04115 04116 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 04117 tmp0 <<= CONST_BITS; 04118 /* Add fudge factor here for final descale. */ 04119 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 04120 04121 z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 04122 tmp1 = MULTIPLY(z1, FIX(1.306562965)); /* c4[16] = c2[8] */ 04123 tmp2 = MULTIPLY(z1, FIX_0_541196100); /* c12[16] = c6[8] */ 04124 04125 tmp10 = tmp0 + tmp1; 04126 tmp11 = tmp0 - tmp1; 04127 tmp12 = tmp0 + tmp2; 04128 tmp13 = tmp0 - tmp2; 04129 04130 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 04131 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 04132 z3 = z1 - z2; 04133 z4 = MULTIPLY(z3, FIX(0.275899379)); /* c14[16] = c7[8] */ 04134 z3 = MULTIPLY(z3, FIX(1.387039845)); /* c2[16] = c1[8] */ 04135 04136 tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447); /* (c6+c2)[16] = (c3+c1)[8] */ 04137 tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223); /* (c6-c14)[16] = (c3-c7)[8] */ 04138 tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */ 04139 tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */ 04140 04141 tmp20 = tmp10 + tmp0; 04142 tmp27 = tmp10 - tmp0; 04143 tmp21 = tmp12 + tmp1; 04144 tmp26 = tmp12 - tmp1; 04145 tmp22 = tmp13 + tmp2; 04146 tmp25 = tmp13 - tmp2; 04147 tmp23 = tmp11 + tmp3; 04148 tmp24 = tmp11 - tmp3; 04149 04150 /* Odd part */ 04151 04152 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 04153 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 04154 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 04155 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 04156 04157 tmp11 = z1 + z3; 04158 04159 tmp1 = MULTIPLY(z1 + z2, FIX(1.353318001)); /* c3 */ 04160 tmp2 = MULTIPLY(tmp11, FIX(1.247225013)); /* c5 */ 04161 tmp3 = MULTIPLY(z1 + z4, FIX(1.093201867)); /* c7 */ 04162 tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586)); /* c9 */ 04163 tmp11 = MULTIPLY(tmp11, FIX(0.666655658)); /* c11 */ 04164 tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528)); /* c13 */ 04165 tmp0 = tmp1 + tmp2 + tmp3 - 04166 MULTIPLY(z1, FIX(2.286341144)); /* c7+c5+c3-c1 */ 04167 tmp13 = tmp10 + tmp11 + tmp12 - 04168 MULTIPLY(z1, FIX(1.835730603)); /* c9+c11+c13-c15 */ 04169 z1 = MULTIPLY(z2 + z3, FIX(0.138617169)); /* c15 */ 04170 tmp1 += z1 + MULTIPLY(z2, FIX(0.071888074)); /* c9+c11-c3-c15 */ 04171 tmp2 += z1 - MULTIPLY(z3, FIX(1.125726048)); /* c5+c7+c15-c3 */ 04172 z1 = MULTIPLY(z3 - z2, FIX(1.407403738)); /* c1 */ 04173 tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282)); /* c1+c11-c9-c13 */ 04174 tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411)); /* c1+c5+c13-c7 */ 04175 z2 += z4; 04176 z1 = MULTIPLY(z2, - FIX(0.666655658)); /* -c11 */ 04177 tmp1 += z1; 04178 tmp3 += z1 + MULTIPLY(z4, FIX(1.065388962)); /* c3+c11+c15-c7 */ 04179 z2 = MULTIPLY(z2, - FIX(1.247225013)); /* -c5 */ 04180 tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809)); /* c1+c5+c9-c13 */ 04181 tmp12 += z2; 04182 z2 = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */ 04183 tmp2 += z2; 04184 tmp3 += z2; 04185 z2 = MULTIPLY(z4 - z3, FIX(0.410524528)); /* c13 */ 04186 tmp10 += z2; 04187 tmp11 += z2; 04188 04189 /* Final output stage */ 04190 04191 wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp0, CONST_BITS-PASS1_BITS); 04192 wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0, CONST_BITS-PASS1_BITS); 04193 wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp1, CONST_BITS-PASS1_BITS); 04194 wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1, CONST_BITS-PASS1_BITS); 04195 wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp2, CONST_BITS-PASS1_BITS); 04196 wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2, CONST_BITS-PASS1_BITS); 04197 wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp3, CONST_BITS-PASS1_BITS); 04198 wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3, CONST_BITS-PASS1_BITS); 04199 wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS); 04200 wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS); 04201 wsptr[8*5] = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS); 04202 wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS); 04203 wsptr[8*6] = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS); 04204 wsptr[8*9] = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS); 04205 wsptr[8*7] = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS); 04206 wsptr[8*8] = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS); 04207 } 04208 04209 /* Pass 2: process rows from work array, store into output array. 04210 * Note that we must descale the results by a factor of 8 == 2**3, 04211 * and also undo the PASS1_BITS scaling. 04212 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 04213 */ 04214 04215 wsptr = workspace; 04216 for (ctr = 0; ctr < 16; ctr++) { 04217 outptr = output_buf[ctr] + output_col; 04218 04219 /* Even part: reverse the even part of the forward DCT. 04220 * The rotator is c(-6). 04221 */ 04222 04223 /* Add range center and fudge factor for final descale and range-limit. */ 04224 z2 = (INT32) wsptr[0] + 04225 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 04226 (ONE << (PASS1_BITS+2))); 04227 z3 = (INT32) wsptr[4]; 04228 04229 tmp0 = (z2 + z3) << CONST_BITS; 04230 tmp1 = (z2 - z3) << CONST_BITS; 04231 04232 z2 = (INT32) wsptr[2]; 04233 z3 = (INT32) wsptr[6]; 04234 04235 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 04236 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 04237 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 04238 04239 tmp10 = tmp0 + tmp2; 04240 tmp13 = tmp0 - tmp2; 04241 tmp11 = tmp1 + tmp3; 04242 tmp12 = tmp1 - tmp3; 04243 04244 /* Odd part per figure 8; the matrix is unitary and hence its 04245 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 04246 */ 04247 04248 tmp0 = (INT32) wsptr[7]; 04249 tmp1 = (INT32) wsptr[5]; 04250 tmp2 = (INT32) wsptr[3]; 04251 tmp3 = (INT32) wsptr[1]; 04252 04253 z2 = tmp0 + tmp2; 04254 z3 = tmp1 + tmp3; 04255 04256 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 04257 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 04258 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 04259 z2 += z1; 04260 z3 += z1; 04261 04262 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 04263 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 04264 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 04265 tmp0 += z1 + z2; 04266 tmp3 += z1 + z3; 04267 04268 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 04269 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 04270 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 04271 tmp1 += z1 + z3; 04272 tmp2 += z1 + z2; 04273 04274 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 04275 04276 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3, 04277 CONST_BITS+PASS1_BITS+3) 04278 & RANGE_MASK]; 04279 outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3, 04280 CONST_BITS+PASS1_BITS+3) 04281 & RANGE_MASK]; 04282 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2, 04283 CONST_BITS+PASS1_BITS+3) 04284 & RANGE_MASK]; 04285 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2, 04286 CONST_BITS+PASS1_BITS+3) 04287 & RANGE_MASK]; 04288 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1, 04289 CONST_BITS+PASS1_BITS+3) 04290 & RANGE_MASK]; 04291 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1, 04292 CONST_BITS+PASS1_BITS+3) 04293 & RANGE_MASK]; 04294 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0, 04295 CONST_BITS+PASS1_BITS+3) 04296 & RANGE_MASK]; 04297 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0, 04298 CONST_BITS+PASS1_BITS+3) 04299 & RANGE_MASK]; 04300 04301 wsptr += DCTSIZE; /* advance pointer to next row */ 04302 } 04303 } 04304 04305 04306 /* 04307 * Perform dequantization and inverse DCT on one block of coefficients, 04308 * producing a 7x14 output block. 04309 * 04310 * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows). 04311 */ 04312 04313 GLOBAL(void) 04314 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 04315 JCOEFPTR coef_block, 04316 JSAMPARRAY output_buf, JDIMENSION output_col) 04317 { 04318 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; 04319 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26; 04320 INT32 z1, z2, z3, z4; 04321 JCOEFPTR inptr; 04322 ISLOW_MULT_TYPE * quantptr; 04323 int * wsptr; 04324 JSAMPROW outptr; 04325 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 04326 int ctr; 04327 int workspace[7*14]; /* buffers data between passes */ 04328 SHIFT_TEMPS 04329 04330 /* Pass 1: process columns from input, store into work array. 04331 * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28). 04332 */ 04333 04334 inptr = coef_block; 04335 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 04336 wsptr = workspace; 04337 for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) { 04338 /* Even part */ 04339 04340 z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 04341 z1 <<= CONST_BITS; 04342 /* Add fudge factor here for final descale. */ 04343 z1 += ONE << (CONST_BITS-PASS1_BITS-1); 04344 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 04345 z2 = MULTIPLY(z4, FIX(1.274162392)); /* c4 */ 04346 z3 = MULTIPLY(z4, FIX(0.314692123)); /* c12 */ 04347 z4 = MULTIPLY(z4, FIX(0.881747734)); /* c8 */ 04348 04349 tmp10 = z1 + z2; 04350 tmp11 = z1 + z3; 04351 tmp12 = z1 - z4; 04352 04353 tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */ 04354 CONST_BITS-PASS1_BITS); 04355 04356 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 04357 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 04358 04359 z3 = MULTIPLY(z1 + z2, FIX(1.105676686)); /* c6 */ 04360 04361 tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */ 04362 tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */ 04363 tmp15 = MULTIPLY(z1, FIX(0.613604268)) - /* c10 */ 04364 MULTIPLY(z2, FIX(1.378756276)); /* c2 */ 04365 04366 tmp20 = tmp10 + tmp13; 04367 tmp26 = tmp10 - tmp13; 04368 tmp21 = tmp11 + tmp14; 04369 tmp25 = tmp11 - tmp14; 04370 tmp22 = tmp12 + tmp15; 04371 tmp24 = tmp12 - tmp15; 04372 04373 /* Odd part */ 04374 04375 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 04376 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 04377 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 04378 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 04379 tmp13 = z4 << CONST_BITS; 04380 04381 tmp14 = z1 + z3; 04382 tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607)); /* c3 */ 04383 tmp12 = MULTIPLY(tmp14, FIX(1.197448846)); /* c5 */ 04384 tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */ 04385 tmp14 = MULTIPLY(tmp14, FIX(0.752406978)); /* c9 */ 04386 tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426)); /* c9+c11-c13 */ 04387 z1 -= z2; 04388 tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13; /* c11 */ 04389 tmp16 += tmp15; 04390 z1 += z4; 04391 z4 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */ 04392 tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948)); /* c3-c9-c13 */ 04393 tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773)); /* c3+c5-c13 */ 04394 z4 = MULTIPLY(z3 - z2, FIX(1.405321284)); /* c1 */ 04395 tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */ 04396 tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567)); /* c1+c11-c5 */ 04397 04398 tmp13 = (z1 - z3) << PASS1_BITS; 04399 04400 /* Final output stage */ 04401 04402 wsptr[7*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 04403 wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 04404 wsptr[7*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 04405 wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 04406 wsptr[7*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 04407 wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 04408 wsptr[7*3] = (int) (tmp23 + tmp13); 04409 wsptr[7*10] = (int) (tmp23 - tmp13); 04410 wsptr[7*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 04411 wsptr[7*9] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 04412 wsptr[7*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 04413 wsptr[7*8] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 04414 wsptr[7*6] = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS); 04415 wsptr[7*7] = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS); 04416 } 04417 04418 /* Pass 2: process 14 rows from work array, store into output array. 04419 * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14). 04420 */ 04421 04422 wsptr = workspace; 04423 for (ctr = 0; ctr < 14; ctr++) { 04424 outptr = output_buf[ctr] + output_col; 04425 04426 /* Even part */ 04427 04428 /* Add range center and fudge factor for final descale and range-limit. */ 04429 tmp23 = (INT32) wsptr[0] + 04430 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 04431 (ONE << (PASS1_BITS+2))); 04432 tmp23 <<= CONST_BITS; 04433 04434 z1 = (INT32) wsptr[2]; 04435 z2 = (INT32) wsptr[4]; 04436 z3 = (INT32) wsptr[6]; 04437 04438 tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734)); /* c4 */ 04439 tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123)); /* c6 */ 04440 tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */ 04441 tmp10 = z1 + z3; 04442 z2 -= tmp10; 04443 tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */ 04444 tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536)); /* c2-c4-c6 */ 04445 tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249)); /* c2+c4+c6 */ 04446 tmp23 += MULTIPLY(z2, FIX(1.414213562)); /* c0 */ 04447 04448 /* Odd part */ 04449 04450 z1 = (INT32) wsptr[1]; 04451 z2 = (INT32) wsptr[3]; 04452 z3 = (INT32) wsptr[5]; 04453 04454 tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347)); /* (c3+c1-c5)/2 */ 04455 tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339)); /* (c3+c5-c1)/2 */ 04456 tmp10 = tmp11 - tmp12; 04457 tmp11 += tmp12; 04458 tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276)); /* -c1 */ 04459 tmp11 += tmp12; 04460 z2 = MULTIPLY(z1 + z3, FIX(0.613604268)); /* c5 */ 04461 tmp10 += z2; 04462 tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693)); /* c3+c1-c5 */ 04463 04464 /* Final output stage */ 04465 04466 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 04467 CONST_BITS+PASS1_BITS+3) 04468 & RANGE_MASK]; 04469 outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 04470 CONST_BITS+PASS1_BITS+3) 04471 & RANGE_MASK]; 04472 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 04473 CONST_BITS+PASS1_BITS+3) 04474 & RANGE_MASK]; 04475 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 04476 CONST_BITS+PASS1_BITS+3) 04477 & RANGE_MASK]; 04478 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 04479 CONST_BITS+PASS1_BITS+3) 04480 & RANGE_MASK]; 04481 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 04482 CONST_BITS+PASS1_BITS+3) 04483 & RANGE_MASK]; 04484 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23, 04485 CONST_BITS+PASS1_BITS+3) 04486 & RANGE_MASK]; 04487 04488 wsptr += 7; /* advance pointer to next row */ 04489 } 04490 } 04491 04492 04493 /* 04494 * Perform dequantization and inverse DCT on one block of coefficients, 04495 * producing a 6x12 output block. 04496 * 04497 * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows). 04498 */ 04499 04500 GLOBAL(void) 04501 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 04502 JCOEFPTR coef_block, 04503 JSAMPARRAY output_buf, JDIMENSION output_col) 04504 { 04505 INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; 04506 INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25; 04507 INT32 z1, z2, z3, z4; 04508 JCOEFPTR inptr; 04509 ISLOW_MULT_TYPE * quantptr; 04510 int * wsptr; 04511 JSAMPROW outptr; 04512 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 04513 int ctr; 04514 int workspace[6*12]; /* buffers data between passes */ 04515 SHIFT_TEMPS 04516 04517 /* Pass 1: process columns from input, store into work array. 04518 * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24). 04519 */ 04520 04521 inptr = coef_block; 04522 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 04523 wsptr = workspace; 04524 for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) { 04525 /* Even part */ 04526 04527 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 04528 z3 <<= CONST_BITS; 04529 /* Add fudge factor here for final descale. */ 04530 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 04531 04532 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 04533 z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */ 04534 04535 tmp10 = z3 + z4; 04536 tmp11 = z3 - z4; 04537 04538 z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 04539 z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */ 04540 z1 <<= CONST_BITS; 04541 z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 04542 z2 <<= CONST_BITS; 04543 04544 tmp12 = z1 - z2; 04545 04546 tmp21 = z3 + tmp12; 04547 tmp24 = z3 - tmp12; 04548 04549 tmp12 = z4 + z2; 04550 04551 tmp20 = tmp10 + tmp12; 04552 tmp25 = tmp10 - tmp12; 04553 04554 tmp12 = z4 - z1 - z2; 04555 04556 tmp22 = tmp11 + tmp12; 04557 tmp23 = tmp11 - tmp12; 04558 04559 /* Odd part */ 04560 04561 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 04562 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 04563 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 04564 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 04565 04566 tmp11 = MULTIPLY(z2, FIX(1.306562965)); /* c3 */ 04567 tmp14 = MULTIPLY(z2, - FIX_0_541196100); /* -c9 */ 04568 04569 tmp10 = z1 + z3; 04570 tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669)); /* c7 */ 04571 tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384)); /* c5-c7 */ 04572 tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716)); /* c1-c5 */ 04573 tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580)); /* -(c7+c11) */ 04574 tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */ 04575 tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */ 04576 tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) - /* c7-c11 */ 04577 MULTIPLY(z4, FIX(1.982889723)); /* c5+c7 */ 04578 04579 z1 -= z4; 04580 z2 -= z3; 04581 z3 = MULTIPLY(z1 + z2, FIX_0_541196100); /* c9 */ 04582 tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865); /* c3-c9 */ 04583 tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065); /* c3+c9 */ 04584 04585 /* Final output stage */ 04586 04587 wsptr[6*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 04588 wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 04589 wsptr[6*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 04590 wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 04591 wsptr[6*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS); 04592 wsptr[6*9] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS); 04593 wsptr[6*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 04594 wsptr[6*8] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 04595 wsptr[6*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 04596 wsptr[6*7] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 04597 wsptr[6*5] = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS); 04598 wsptr[6*6] = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS); 04599 } 04600 04601 /* Pass 2: process 12 rows from work array, store into output array. 04602 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 04603 */ 04604 04605 wsptr = workspace; 04606 for (ctr = 0; ctr < 12; ctr++) { 04607 outptr = output_buf[ctr] + output_col; 04608 04609 /* Even part */ 04610 04611 /* Add range center and fudge factor for final descale and range-limit. */ 04612 tmp10 = (INT32) wsptr[0] + 04613 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 04614 (ONE << (PASS1_BITS+2))); 04615 tmp10 <<= CONST_BITS; 04616 tmp12 = (INT32) wsptr[4]; 04617 tmp20 = MULTIPLY(tmp12, FIX(0.707106781)); /* c4 */ 04618 tmp11 = tmp10 + tmp20; 04619 tmp21 = tmp10 - tmp20 - tmp20; 04620 tmp20 = (INT32) wsptr[2]; 04621 tmp10 = MULTIPLY(tmp20, FIX(1.224744871)); /* c2 */ 04622 tmp20 = tmp11 + tmp10; 04623 tmp22 = tmp11 - tmp10; 04624 04625 /* Odd part */ 04626 04627 z1 = (INT32) wsptr[1]; 04628 z2 = (INT32) wsptr[3]; 04629 z3 = (INT32) wsptr[5]; 04630 tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 04631 tmp10 = tmp11 + ((z1 + z2) << CONST_BITS); 04632 tmp12 = tmp11 + ((z3 - z2) << CONST_BITS); 04633 tmp11 = (z1 - z2 - z3) << CONST_BITS; 04634 04635 /* Final output stage */ 04636 04637 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10, 04638 CONST_BITS+PASS1_BITS+3) 04639 & RANGE_MASK]; 04640 outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10, 04641 CONST_BITS+PASS1_BITS+3) 04642 & RANGE_MASK]; 04643 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11, 04644 CONST_BITS+PASS1_BITS+3) 04645 & RANGE_MASK]; 04646 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11, 04647 CONST_BITS+PASS1_BITS+3) 04648 & RANGE_MASK]; 04649 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12, 04650 CONST_BITS+PASS1_BITS+3) 04651 & RANGE_MASK]; 04652 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12, 04653 CONST_BITS+PASS1_BITS+3) 04654 & RANGE_MASK]; 04655 04656 wsptr += 6; /* advance pointer to next row */ 04657 } 04658 } 04659 04660 04661 /* 04662 * Perform dequantization and inverse DCT on one block of coefficients, 04663 * producing a 5x10 output block. 04664 * 04665 * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows). 04666 */ 04667 04668 GLOBAL(void) 04669 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 04670 JCOEFPTR coef_block, 04671 JSAMPARRAY output_buf, JDIMENSION output_col) 04672 { 04673 INT32 tmp10, tmp11, tmp12, tmp13, tmp14; 04674 INT32 tmp20, tmp21, tmp22, tmp23, tmp24; 04675 INT32 z1, z2, z3, z4, z5; 04676 JCOEFPTR inptr; 04677 ISLOW_MULT_TYPE * quantptr; 04678 int * wsptr; 04679 JSAMPROW outptr; 04680 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 04681 int ctr; 04682 int workspace[5*10]; /* buffers data between passes */ 04683 SHIFT_TEMPS 04684 04685 /* Pass 1: process columns from input, store into work array. 04686 * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20). 04687 */ 04688 04689 inptr = coef_block; 04690 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 04691 wsptr = workspace; 04692 for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) { 04693 /* Even part */ 04694 04695 z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 04696 z3 <<= CONST_BITS; 04697 /* Add fudge factor here for final descale. */ 04698 z3 += ONE << (CONST_BITS-PASS1_BITS-1); 04699 z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 04700 z1 = MULTIPLY(z4, FIX(1.144122806)); /* c4 */ 04701 z2 = MULTIPLY(z4, FIX(0.437016024)); /* c8 */ 04702 tmp10 = z3 + z1; 04703 tmp11 = z3 - z2; 04704 04705 tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1), /* c0 = (c4-c8)*2 */ 04706 CONST_BITS-PASS1_BITS); 04707 04708 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 04709 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 04710 04711 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c6 */ 04712 tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */ 04713 tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */ 04714 04715 tmp20 = tmp10 + tmp12; 04716 tmp24 = tmp10 - tmp12; 04717 tmp21 = tmp11 + tmp13; 04718 tmp23 = tmp11 - tmp13; 04719 04720 /* Odd part */ 04721 04722 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 04723 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 04724 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 04725 z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 04726 04727 tmp11 = z2 + z4; 04728 tmp13 = z2 - z4; 04729 04730 tmp12 = MULTIPLY(tmp13, FIX(0.309016994)); /* (c3-c7)/2 */ 04731 z5 = z3 << CONST_BITS; 04732 04733 z2 = MULTIPLY(tmp11, FIX(0.951056516)); /* (c3+c7)/2 */ 04734 z4 = z5 + tmp12; 04735 04736 tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */ 04737 tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */ 04738 04739 z2 = MULTIPLY(tmp11, FIX(0.587785252)); /* (c1-c9)/2 */ 04740 z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1)); 04741 04742 tmp12 = (z1 - tmp13 - z3) << PASS1_BITS; 04743 04744 tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */ 04745 tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */ 04746 04747 /* Final output stage */ 04748 04749 wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS); 04750 wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS); 04751 wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS); 04752 wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS); 04753 wsptr[5*2] = (int) (tmp22 + tmp12); 04754 wsptr[5*7] = (int) (tmp22 - tmp12); 04755 wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS); 04756 wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS); 04757 wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS); 04758 wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS); 04759 } 04760 04761 /* Pass 2: process 10 rows from work array, store into output array. 04762 * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10). 04763 */ 04764 04765 wsptr = workspace; 04766 for (ctr = 0; ctr < 10; ctr++) { 04767 outptr = output_buf[ctr] + output_col; 04768 04769 /* Even part */ 04770 04771 /* Add range center and fudge factor for final descale and range-limit. */ 04772 tmp12 = (INT32) wsptr[0] + 04773 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 04774 (ONE << (PASS1_BITS+2))); 04775 tmp12 <<= CONST_BITS; 04776 tmp13 = (INT32) wsptr[2]; 04777 tmp14 = (INT32) wsptr[4]; 04778 z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */ 04779 z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */ 04780 z3 = tmp12 + z2; 04781 tmp10 = z3 + z1; 04782 tmp11 = z3 - z1; 04783 tmp12 -= z2 << 2; 04784 04785 /* Odd part */ 04786 04787 z2 = (INT32) wsptr[1]; 04788 z3 = (INT32) wsptr[3]; 04789 04790 z1 = MULTIPLY(z2 + z3, FIX(0.831253876)); /* c3 */ 04791 tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c1-c3 */ 04792 tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c1+c3 */ 04793 04794 /* Final output stage */ 04795 04796 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13, 04797 CONST_BITS+PASS1_BITS+3) 04798 & RANGE_MASK]; 04799 outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13, 04800 CONST_BITS+PASS1_BITS+3) 04801 & RANGE_MASK]; 04802 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14, 04803 CONST_BITS+PASS1_BITS+3) 04804 & RANGE_MASK]; 04805 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14, 04806 CONST_BITS+PASS1_BITS+3) 04807 & RANGE_MASK]; 04808 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12, 04809 CONST_BITS+PASS1_BITS+3) 04810 & RANGE_MASK]; 04811 04812 wsptr += 5; /* advance pointer to next row */ 04813 } 04814 } 04815 04816 04817 /* 04818 * Perform dequantization and inverse DCT on one block of coefficients, 04819 * producing a 4x8 output block. 04820 * 04821 * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows). 04822 */ 04823 04824 GLOBAL(void) 04825 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 04826 JCOEFPTR coef_block, 04827 JSAMPARRAY output_buf, JDIMENSION output_col) 04828 { 04829 INT32 tmp0, tmp1, tmp2, tmp3; 04830 INT32 tmp10, tmp11, tmp12, tmp13; 04831 INT32 z1, z2, z3; 04832 JCOEFPTR inptr; 04833 ISLOW_MULT_TYPE * quantptr; 04834 int * wsptr; 04835 JSAMPROW outptr; 04836 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 04837 int ctr; 04838 int workspace[4*8]; /* buffers data between passes */ 04839 SHIFT_TEMPS 04840 04841 /* Pass 1: process columns from input, store into work array. 04842 * Note results are scaled up by sqrt(8) compared to a true IDCT; 04843 * furthermore, we scale the results by 2**PASS1_BITS. 04844 * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16). 04845 */ 04846 04847 inptr = coef_block; 04848 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 04849 wsptr = workspace; 04850 for (ctr = 4; ctr > 0; ctr--) { 04851 /* Due to quantization, we will usually find that many of the input 04852 * coefficients are zero, especially the AC terms. We can exploit this 04853 * by short-circuiting the IDCT calculation for any column in which all 04854 * the AC terms are zero. In that case each output is equal to the 04855 * DC coefficient (with scale factor as needed). 04856 * With typical images and quantization tables, half or more of the 04857 * column DCT calculations can be simplified this way. 04858 */ 04859 04860 if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 && 04861 inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 && 04862 inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 && 04863 inptr[DCTSIZE*7] == 0) { 04864 /* AC terms all zero */ 04865 int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS; 04866 04867 wsptr[4*0] = dcval; 04868 wsptr[4*1] = dcval; 04869 wsptr[4*2] = dcval; 04870 wsptr[4*3] = dcval; 04871 wsptr[4*4] = dcval; 04872 wsptr[4*5] = dcval; 04873 wsptr[4*6] = dcval; 04874 wsptr[4*7] = dcval; 04875 04876 inptr++; /* advance pointers to next column */ 04877 quantptr++; 04878 wsptr++; 04879 continue; 04880 } 04881 04882 /* Even part: reverse the even part of the forward DCT. 04883 * The rotator is c(-6). 04884 */ 04885 04886 z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 04887 z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]); 04888 04889 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 04890 tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 04891 tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 04892 04893 z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 04894 z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 04895 z2 <<= CONST_BITS; 04896 z3 <<= CONST_BITS; 04897 /* Add fudge factor here for final descale. */ 04898 z2 += ONE << (CONST_BITS-PASS1_BITS-1); 04899 04900 tmp0 = z2 + z3; 04901 tmp1 = z2 - z3; 04902 04903 tmp10 = tmp0 + tmp2; 04904 tmp13 = tmp0 - tmp2; 04905 tmp11 = tmp1 + tmp3; 04906 tmp12 = tmp1 - tmp3; 04907 04908 /* Odd part per figure 8; the matrix is unitary and hence its 04909 * transpose is its inverse. i0..i3 are y7,y5,y3,y1 respectively. 04910 */ 04911 04912 tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]); 04913 tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 04914 tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 04915 tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 04916 04917 z2 = tmp0 + tmp2; 04918 z3 = tmp1 + tmp3; 04919 04920 z1 = MULTIPLY(z2 + z3, FIX_1_175875602); /* c3 */ 04921 z2 = MULTIPLY(z2, - FIX_1_961570560); /* -c3-c5 */ 04922 z3 = MULTIPLY(z3, - FIX_0_390180644); /* -c3+c5 */ 04923 z2 += z1; 04924 z3 += z1; 04925 04926 z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */ 04927 tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* -c1+c3+c5-c7 */ 04928 tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* c1+c3-c5-c7 */ 04929 tmp0 += z1 + z2; 04930 tmp3 += z1 + z3; 04931 04932 z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */ 04933 tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* c1+c3-c5+c7 */ 04934 tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* c1+c3+c5-c7 */ 04935 tmp1 += z1 + z3; 04936 tmp2 += z1 + z2; 04937 04938 /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */ 04939 04940 wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS); 04941 wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS); 04942 wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS); 04943 wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS); 04944 wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS); 04945 wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS); 04946 wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS); 04947 wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS); 04948 04949 inptr++; /* advance pointers to next column */ 04950 quantptr++; 04951 wsptr++; 04952 } 04953 04954 /* Pass 2: process 8 rows from work array, store into output array. 04955 * 4-point IDCT kernel, 04956 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 04957 */ 04958 04959 wsptr = workspace; 04960 for (ctr = 0; ctr < 8; ctr++) { 04961 outptr = output_buf[ctr] + output_col; 04962 04963 /* Even part */ 04964 04965 /* Add range center and fudge factor for final descale and range-limit. */ 04966 tmp0 = (INT32) wsptr[0] + 04967 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 04968 (ONE << (PASS1_BITS+2))); 04969 tmp2 = (INT32) wsptr[2]; 04970 04971 tmp10 = (tmp0 + tmp2) << CONST_BITS; 04972 tmp12 = (tmp0 - tmp2) << CONST_BITS; 04973 04974 /* Odd part */ 04975 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 04976 04977 z2 = (INT32) wsptr[1]; 04978 z3 = (INT32) wsptr[3]; 04979 04980 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 04981 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 04982 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 04983 04984 /* Final output stage */ 04985 04986 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 04987 CONST_BITS+PASS1_BITS+3) 04988 & RANGE_MASK]; 04989 outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 04990 CONST_BITS+PASS1_BITS+3) 04991 & RANGE_MASK]; 04992 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2, 04993 CONST_BITS+PASS1_BITS+3) 04994 & RANGE_MASK]; 04995 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2, 04996 CONST_BITS+PASS1_BITS+3) 04997 & RANGE_MASK]; 04998 04999 wsptr += 4; /* advance pointer to next row */ 05000 } 05001 } 05002 05003 05004 /* 05005 * Perform dequantization and inverse DCT on one block of coefficients, 05006 * producing a reduced-size 3x6 output block. 05007 * 05008 * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows). 05009 */ 05010 05011 GLOBAL(void) 05012 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 05013 JCOEFPTR coef_block, 05014 JSAMPARRAY output_buf, JDIMENSION output_col) 05015 { 05016 INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12; 05017 INT32 z1, z2, z3; 05018 JCOEFPTR inptr; 05019 ISLOW_MULT_TYPE * quantptr; 05020 int * wsptr; 05021 JSAMPROW outptr; 05022 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 05023 int ctr; 05024 int workspace[3*6]; /* buffers data between passes */ 05025 SHIFT_TEMPS 05026 05027 /* Pass 1: process columns from input, store into work array. 05028 * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12). 05029 */ 05030 05031 inptr = coef_block; 05032 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 05033 wsptr = workspace; 05034 for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) { 05035 /* Even part */ 05036 05037 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 05038 tmp0 <<= CONST_BITS; 05039 /* Add fudge factor here for final descale. */ 05040 tmp0 += ONE << (CONST_BITS-PASS1_BITS-1); 05041 tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]); 05042 tmp10 = MULTIPLY(tmp2, FIX(0.707106781)); /* c4 */ 05043 tmp1 = tmp0 + tmp10; 05044 tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS); 05045 tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 05046 tmp0 = MULTIPLY(tmp10, FIX(1.224744871)); /* c2 */ 05047 tmp10 = tmp1 + tmp0; 05048 tmp12 = tmp1 - tmp0; 05049 05050 /* Odd part */ 05051 05052 z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 05053 z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 05054 z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]); 05055 tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */ 05056 tmp0 = tmp1 + ((z1 + z2) << CONST_BITS); 05057 tmp2 = tmp1 + ((z3 - z2) << CONST_BITS); 05058 tmp1 = (z1 - z2 - z3) << PASS1_BITS; 05059 05060 /* Final output stage */ 05061 05062 wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS); 05063 wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS); 05064 wsptr[3*1] = (int) (tmp11 + tmp1); 05065 wsptr[3*4] = (int) (tmp11 - tmp1); 05066 wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS); 05067 wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS); 05068 } 05069 05070 /* Pass 2: process 6 rows from work array, store into output array. 05071 * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6). 05072 */ 05073 05074 wsptr = workspace; 05075 for (ctr = 0; ctr < 6; ctr++) { 05076 outptr = output_buf[ctr] + output_col; 05077 05078 /* Even part */ 05079 05080 /* Add range center and fudge factor for final descale and range-limit. */ 05081 tmp0 = (INT32) wsptr[0] + 05082 ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) + 05083 (ONE << (PASS1_BITS+2))); 05084 tmp0 <<= CONST_BITS; 05085 tmp2 = (INT32) wsptr[2]; 05086 tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */ 05087 tmp10 = tmp0 + tmp12; 05088 tmp2 = tmp0 - tmp12 - tmp12; 05089 05090 /* Odd part */ 05091 05092 tmp12 = (INT32) wsptr[1]; 05093 tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */ 05094 05095 /* Final output stage */ 05096 05097 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, 05098 CONST_BITS+PASS1_BITS+3) 05099 & RANGE_MASK]; 05100 outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, 05101 CONST_BITS+PASS1_BITS+3) 05102 & RANGE_MASK]; 05103 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2, 05104 CONST_BITS+PASS1_BITS+3) 05105 & RANGE_MASK]; 05106 05107 wsptr += 3; /* advance pointer to next row */ 05108 } 05109 } 05110 05111 05112 /* 05113 * Perform dequantization and inverse DCT on one block of coefficients, 05114 * producing a 2x4 output block. 05115 * 05116 * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows). 05117 */ 05118 05119 GLOBAL(void) 05120 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 05121 JCOEFPTR coef_block, 05122 JSAMPARRAY output_buf, JDIMENSION output_col) 05123 { 05124 INT32 tmp0, tmp2, tmp10, tmp12; 05125 INT32 z1, z2, z3; 05126 JCOEFPTR inptr; 05127 ISLOW_MULT_TYPE * quantptr; 05128 INT32 * wsptr; 05129 JSAMPROW outptr; 05130 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 05131 int ctr; 05132 INT32 workspace[2*4]; /* buffers data between passes */ 05133 SHIFT_TEMPS 05134 05135 /* Pass 1: process columns from input, store into work array. 05136 * 4-point IDCT kernel, 05137 * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT]. 05138 */ 05139 05140 inptr = coef_block; 05141 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 05142 wsptr = workspace; 05143 for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) { 05144 /* Even part */ 05145 05146 tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]); 05147 tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]); 05148 05149 tmp10 = (tmp0 + tmp2) << CONST_BITS; 05150 tmp12 = (tmp0 - tmp2) << CONST_BITS; 05151 05152 /* Odd part */ 05153 /* Same rotation as in the even part of the 8x8 LL&M IDCT */ 05154 05155 z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]); 05156 z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]); 05157 05158 z1 = MULTIPLY(z2 + z3, FIX_0_541196100); /* c6 */ 05159 tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */ 05160 tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */ 05161 05162 /* Final output stage */ 05163 05164 wsptr[2*0] = tmp10 + tmp0; 05165 wsptr[2*3] = tmp10 - tmp0; 05166 wsptr[2*1] = tmp12 + tmp2; 05167 wsptr[2*2] = tmp12 - tmp2; 05168 } 05169 05170 /* Pass 2: process 4 rows from work array, store into output array. */ 05171 05172 wsptr = workspace; 05173 for (ctr = 0; ctr < 4; ctr++) { 05174 outptr = output_buf[ctr] + output_col; 05175 05176 /* Even part */ 05177 05178 /* Add range center and fudge factor for final descale and range-limit. */ 05179 tmp10 = wsptr[0] + 05180 ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) + 05181 (ONE << (CONST_BITS+2))); 05182 05183 /* Odd part */ 05184 05185 tmp0 = wsptr[1]; 05186 05187 /* Final output stage */ 05188 05189 outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3) 05190 & RANGE_MASK]; 05191 outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3) 05192 & RANGE_MASK]; 05193 05194 wsptr += 2; /* advance pointer to next row */ 05195 } 05196 } 05197 05198 05199 /* 05200 * Perform dequantization and inverse DCT on one block of coefficients, 05201 * producing a 1x2 output block. 05202 * 05203 * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows). 05204 */ 05205 05206 GLOBAL(void) 05207 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr, 05208 JCOEFPTR coef_block, 05209 JSAMPARRAY output_buf, JDIMENSION output_col) 05210 { 05211 DCTELEM tmp0, tmp1; 05212 ISLOW_MULT_TYPE * quantptr; 05213 JSAMPLE *range_limit = IDCT_range_limit(cinfo); 05214 ISHIFT_TEMPS 05215 05216 /* Process 1 column from input, store into output array. */ 05217 05218 quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table; 05219 05220 /* Even part */ 05221 05222 tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]); 05223 /* Add range center and fudge factor for final descale and range-limit. */ 05224 tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2); 05225 05226 /* Odd part */ 05227 05228 tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]); 05229 05230 /* Final output stage */ 05231 05232 output_buf[0][output_col] = 05233 range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK]; 05234 output_buf[1][output_col] = 05235 range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK]; 05236 } 05237 05238 #endif /* IDCT_SCALING_SUPPORTED */ 05239 #endif /* DCT_ISLOW_SUPPORTED */
Generated on Wed Jul 13 2022 18:56:09 by 1.7.2