Final 350 project

Dependencies:   uzair Camera_LS_Y201 F7_Ethernet LCD_DISCO_F746NG NetworkAPI SDFileSystem mbed

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers jidctint.c Source File

jidctint.c

00001 /*
00002  * jidctint.c
00003  *
00004  * Copyright (C) 1991-1998, Thomas G. Lane.
00005  * Modification developed 2002-2015 by Guido Vollbeding.
00006  * This file is part of the Independent JPEG Group's software.
00007  * For conditions of distribution and use, see the accompanying README file.
00008  *
00009  * This file contains a slow-but-accurate integer implementation of the
00010  * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
00011  * must also perform dequantization of the input coefficients.
00012  *
00013  * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
00014  * on each row (or vice versa, but it's more convenient to emit a row at
00015  * a time).  Direct algorithms are also available, but they are much more
00016  * complex and seem not to be any faster when reduced to code.
00017  *
00018  * This implementation is based on an algorithm described in
00019  *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
00020  *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
00021  *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
00022  * The primary algorithm described there uses 11 multiplies and 29 adds.
00023  * We use their alternate method with 12 multiplies and 32 adds.
00024  * The advantage of this method is that no data path contains more than one
00025  * multiplication; this allows a very simple and accurate implementation in
00026  * scaled fixed-point arithmetic, with a minimal number of shifts.
00027  *
00028  * We also provide IDCT routines with various output sample block sizes for
00029  * direct resolution reduction or enlargement and for direct resolving the
00030  * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
00031  * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 input DCT block.
00032  *
00033  * For N<8 we simply take the corresponding low-frequency coefficients of
00034  * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
00035  * to yield the downscaled outputs.
00036  * This can be seen as direct low-pass downsampling from the DCT domain
00037  * point of view rather than the usual spatial domain point of view,
00038  * yielding significant computational savings and results at least
00039  * as good as common bilinear (averaging) spatial downsampling.
00040  *
00041  * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
00042  * lower frequencies and higher frequencies assumed to be zero.
00043  * It turns out that the computational effort is similar to the 8x8 IDCT
00044  * regarding the output size.
00045  * Furthermore, the scaling and descaling is the same for all IDCT sizes.
00046  *
00047  * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
00048  * since there would be too many additional constants to pre-calculate.
00049  */
00050 
00051 #define JPEG_INTERNALS
00052 #include "jinclude.h"
00053 #include "jpeglib.h"
00054 #include "jdct.h"       /* Private declarations for DCT subsystem */
00055 
00056 #ifdef DCT_ISLOW_SUPPORTED
00057 
00058 
00059 /*
00060  * This module is specialized to the case DCTSIZE = 8.
00061  */
00062 
00063 #if DCTSIZE != 8
00064   Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
00065 #endif
00066 
00067 
00068 /*
00069  * The poop on this scaling stuff is as follows:
00070  *
00071  * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
00072  * larger than the true IDCT outputs.  The final outputs are therefore
00073  * a factor of N larger than desired; since N=8 this can be cured by
00074  * a simple right shift at the end of the algorithm.  The advantage of
00075  * this arrangement is that we save two multiplications per 1-D IDCT,
00076  * because the y0 and y4 inputs need not be divided by sqrt(N).
00077  *
00078  * We have to do addition and subtraction of the integer inputs, which
00079  * is no problem, and multiplication by fractional constants, which is
00080  * a problem to do in integer arithmetic.  We multiply all the constants
00081  * by CONST_SCALE and convert them to integer constants (thus retaining
00082  * CONST_BITS bits of precision in the constants).  After doing a
00083  * multiplication we have to divide the product by CONST_SCALE, with proper
00084  * rounding, to produce the correct output.  This division can be done
00085  * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
00086  * as long as possible so that partial sums can be added together with
00087  * full fractional precision.
00088  *
00089  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
00090  * they are represented to better-than-integral precision.  These outputs
00091  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
00092  * with the recommended scaling.  (To scale up 12-bit sample data further, an
00093  * intermediate INT32 array would be needed.)
00094  *
00095  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
00096  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
00097  * shows that the values given below are the most effective.
00098  */
00099 
00100 #if BITS_IN_JSAMPLE == 8
00101 #define CONST_BITS  13
00102 #define PASS1_BITS  2
00103 #else
00104 #define CONST_BITS  13
00105 #define PASS1_BITS  1       /* lose a little precision to avoid overflow */
00106 #endif
00107 
00108 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
00109  * causing a lot of useless floating-point operations at run time.
00110  * To get around this we use the following pre-calculated constants.
00111  * If you change CONST_BITS you may want to add appropriate values.
00112  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
00113  */
00114 
00115 #if CONST_BITS == 13
00116 #define FIX_0_298631336  ((INT32)  2446)    /* FIX(0.298631336) */
00117 #define FIX_0_390180644  ((INT32)  3196)    /* FIX(0.390180644) */
00118 #define FIX_0_541196100  ((INT32)  4433)    /* FIX(0.541196100) */
00119 #define FIX_0_765366865  ((INT32)  6270)    /* FIX(0.765366865) */
00120 #define FIX_0_899976223  ((INT32)  7373)    /* FIX(0.899976223) */
00121 #define FIX_1_175875602  ((INT32)  9633)    /* FIX(1.175875602) */
00122 #define FIX_1_501321110  ((INT32)  12299)   /* FIX(1.501321110) */
00123 #define FIX_1_847759065  ((INT32)  15137)   /* FIX(1.847759065) */
00124 #define FIX_1_961570560  ((INT32)  16069)   /* FIX(1.961570560) */
00125 #define FIX_2_053119869  ((INT32)  16819)   /* FIX(2.053119869) */
00126 #define FIX_2_562915447  ((INT32)  20995)   /* FIX(2.562915447) */
00127 #define FIX_3_072711026  ((INT32)  25172)   /* FIX(3.072711026) */
00128 #else
00129 #define FIX_0_298631336  FIX(0.298631336)
00130 #define FIX_0_390180644  FIX(0.390180644)
00131 #define FIX_0_541196100  FIX(0.541196100)
00132 #define FIX_0_765366865  FIX(0.765366865)
00133 #define FIX_0_899976223  FIX(0.899976223)
00134 #define FIX_1_175875602  FIX(1.175875602)
00135 #define FIX_1_501321110  FIX(1.501321110)
00136 #define FIX_1_847759065  FIX(1.847759065)
00137 #define FIX_1_961570560  FIX(1.961570560)
00138 #define FIX_2_053119869  FIX(2.053119869)
00139 #define FIX_2_562915447  FIX(2.562915447)
00140 #define FIX_3_072711026  FIX(3.072711026)
00141 #endif
00142 
00143 
00144 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
00145  * For 8-bit samples with the recommended scaling, all the variable
00146  * and constant values involved are no more than 16 bits wide, so a
00147  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
00148  * For 12-bit samples, a full 32-bit multiplication will be needed.
00149  */
00150 
00151 #if BITS_IN_JSAMPLE == 8
00152 #define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
00153 #else
00154 #define MULTIPLY(var,const)  ((var) * (const))
00155 #endif
00156 
00157 
00158 /* Dequantize a coefficient by multiplying it by the multiplier-table
00159  * entry; produce an int result.  In this module, both inputs and result
00160  * are 16 bits or less, so either int or short multiply will work.
00161  */
00162 
00163 #define DEQUANTIZE(coef,quantval)  (((ISLOW_MULT_TYPE) (coef)) * (quantval))
00164 
00165 
00166 /*
00167  * Perform dequantization and inverse DCT on one block of coefficients.
00168  *
00169  * cK represents sqrt(2) * cos(K*pi/16).
00170  */
00171 
00172 GLOBAL(void)
00173 jpeg_idct_islow (j_decompress_ptr cinfo, jpeg_component_info * compptr,
00174          JCOEFPTR coef_block,
00175          JSAMPARRAY output_buf, JDIMENSION output_col)
00176 {
00177   INT32 tmp0, tmp1, tmp2, tmp3;
00178   INT32 tmp10, tmp11, tmp12, tmp13;
00179   INT32 z1, z2, z3;
00180   JCOEFPTR inptr;
00181   ISLOW_MULT_TYPE * quantptr;
00182   int * wsptr;
00183   JSAMPROW outptr;
00184   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
00185   int ctr;
00186   int workspace[DCTSIZE2];  /* buffers data between passes */
00187   SHIFT_TEMPS
00188 
00189   /* Pass 1: process columns from input, store into work array.
00190    * Note results are scaled up by sqrt(8) compared to a true IDCT;
00191    * furthermore, we scale the results by 2**PASS1_BITS.
00192    */
00193 
00194   inptr = coef_block;
00195   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
00196   wsptr = workspace;
00197   for (ctr = DCTSIZE; ctr > 0; ctr--) {
00198     /* Due to quantization, we will usually find that many of the input
00199      * coefficients are zero, especially the AC terms.  We can exploit this
00200      * by short-circuiting the IDCT calculation for any column in which all
00201      * the AC terms are zero.  In that case each output is equal to the
00202      * DC coefficient (with scale factor as needed).
00203      * With typical images and quantization tables, half or more of the
00204      * column DCT calculations can be simplified this way.
00205      */
00206 
00207     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
00208     inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
00209     inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
00210     inptr[DCTSIZE*7] == 0) {
00211       /* AC terms all zero */
00212       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
00213 
00214       wsptr[DCTSIZE*0] = dcval;
00215       wsptr[DCTSIZE*1] = dcval;
00216       wsptr[DCTSIZE*2] = dcval;
00217       wsptr[DCTSIZE*3] = dcval;
00218       wsptr[DCTSIZE*4] = dcval;
00219       wsptr[DCTSIZE*5] = dcval;
00220       wsptr[DCTSIZE*6] = dcval;
00221       wsptr[DCTSIZE*7] = dcval;
00222 
00223       inptr++;          /* advance pointers to next column */
00224       quantptr++;
00225       wsptr++;
00226       continue;
00227     }
00228 
00229     /* Even part: reverse the even part of the forward DCT.
00230      * The rotator is c(-6).
00231      */
00232 
00233     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
00234     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
00235     z2 <<= CONST_BITS;
00236     z3 <<= CONST_BITS;
00237     /* Add fudge factor here for final descale. */
00238     z2 += ONE << (CONST_BITS-PASS1_BITS-1);
00239 
00240     tmp0 = z2 + z3;
00241     tmp1 = z2 - z3;
00242 
00243     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
00244     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
00245 
00246     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
00247     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
00248     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
00249 
00250     tmp10 = tmp0 + tmp2;
00251     tmp13 = tmp0 - tmp2;
00252     tmp11 = tmp1 + tmp3;
00253     tmp12 = tmp1 - tmp3;
00254 
00255     /* Odd part per figure 8; the matrix is unitary and hence its
00256      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
00257      */
00258 
00259     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
00260     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
00261     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
00262     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
00263 
00264     z2 = tmp0 + tmp2;
00265     z3 = tmp1 + tmp3;
00266 
00267     z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
00268     z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
00269     z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
00270     z2 += z1;
00271     z3 += z1;
00272 
00273     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
00274     tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
00275     tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
00276     tmp0 += z1 + z2;
00277     tmp3 += z1 + z3;
00278 
00279     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
00280     tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
00281     tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
00282     tmp1 += z1 + z3;
00283     tmp2 += z1 + z2;
00284 
00285     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
00286 
00287     wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
00288     wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
00289     wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
00290     wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
00291     wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
00292     wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
00293     wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
00294     wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
00295 
00296     inptr++;            /* advance pointers to next column */
00297     quantptr++;
00298     wsptr++;
00299   }
00300 
00301   /* Pass 2: process rows from work array, store into output array.
00302    * Note that we must descale the results by a factor of 8 == 2**3,
00303    * and also undo the PASS1_BITS scaling.
00304    */
00305 
00306   wsptr = workspace;
00307   for (ctr = 0; ctr < DCTSIZE; ctr++) {
00308     outptr = output_buf[ctr] + output_col;
00309 
00310     /* Add range center and fudge factor for final descale and range-limit. */
00311     z2 = (INT32) wsptr[0] +
00312        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
00313         (ONE << (PASS1_BITS+2)));
00314 
00315     /* Rows of zeroes can be exploited in the same way as we did with columns.
00316      * However, the column calculation has created many nonzero AC terms, so
00317      * the simplification applies less often (typically 5% to 10% of the time).
00318      * On machines with very fast multiplication, it's possible that the
00319      * test takes more time than it's worth.  In that case this section
00320      * may be commented out.
00321      */
00322 
00323 #ifndef NO_ZERO_ROW_TEST
00324     if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
00325     wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
00326       /* AC terms all zero */
00327       JSAMPLE dcval = range_limit[(int) RIGHT_SHIFT(z2, PASS1_BITS+3)
00328                   & RANGE_MASK];
00329 
00330       outptr[0] = dcval;
00331       outptr[1] = dcval;
00332       outptr[2] = dcval;
00333       outptr[3] = dcval;
00334       outptr[4] = dcval;
00335       outptr[5] = dcval;
00336       outptr[6] = dcval;
00337       outptr[7] = dcval;
00338 
00339       wsptr += DCTSIZE;     /* advance pointer to next row */
00340       continue;
00341     }
00342 #endif
00343 
00344     /* Even part: reverse the even part of the forward DCT.
00345      * The rotator is c(-6).
00346      */
00347 
00348     z3 = (INT32) wsptr[4];
00349 
00350     tmp0 = (z2 + z3) << CONST_BITS;
00351     tmp1 = (z2 - z3) << CONST_BITS;
00352 
00353     z2 = (INT32) wsptr[2];
00354     z3 = (INT32) wsptr[6];
00355 
00356     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
00357     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
00358     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
00359 
00360     tmp10 = tmp0 + tmp2;
00361     tmp13 = tmp0 - tmp2;
00362     tmp11 = tmp1 + tmp3;
00363     tmp12 = tmp1 - tmp3;
00364 
00365     /* Odd part per figure 8; the matrix is unitary and hence its
00366      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
00367      */
00368 
00369     tmp0 = (INT32) wsptr[7];
00370     tmp1 = (INT32) wsptr[5];
00371     tmp2 = (INT32) wsptr[3];
00372     tmp3 = (INT32) wsptr[1];
00373 
00374     z2 = tmp0 + tmp2;
00375     z3 = tmp1 + tmp3;
00376 
00377     z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
00378     z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
00379     z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
00380     z2 += z1;
00381     z3 += z1;
00382 
00383     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
00384     tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
00385     tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
00386     tmp0 += z1 + z2;
00387     tmp3 += z1 + z3;
00388 
00389     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
00390     tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
00391     tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
00392     tmp1 += z1 + z3;
00393     tmp2 += z1 + z2;
00394 
00395     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
00396 
00397     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
00398                           CONST_BITS+PASS1_BITS+3)
00399                 & RANGE_MASK];
00400     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
00401                           CONST_BITS+PASS1_BITS+3)
00402                 & RANGE_MASK];
00403     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
00404                           CONST_BITS+PASS1_BITS+3)
00405                 & RANGE_MASK];
00406     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
00407                           CONST_BITS+PASS1_BITS+3)
00408                 & RANGE_MASK];
00409     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
00410                           CONST_BITS+PASS1_BITS+3)
00411                 & RANGE_MASK];
00412     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
00413                           CONST_BITS+PASS1_BITS+3)
00414                 & RANGE_MASK];
00415     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
00416                           CONST_BITS+PASS1_BITS+3)
00417                 & RANGE_MASK];
00418     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
00419                           CONST_BITS+PASS1_BITS+3)
00420                 & RANGE_MASK];
00421 
00422     wsptr += DCTSIZE;       /* advance pointer to next row */
00423   }
00424 }
00425 
00426 #ifdef IDCT_SCALING_SUPPORTED
00427 
00428 
00429 /*
00430  * Perform dequantization and inverse DCT on one block of coefficients,
00431  * producing a 7x7 output block.
00432  *
00433  * Optimized algorithm with 12 multiplications in the 1-D kernel.
00434  * cK represents sqrt(2) * cos(K*pi/14).
00435  */
00436 
00437 GLOBAL(void)
00438 jpeg_idct_7x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
00439            JCOEFPTR coef_block,
00440            JSAMPARRAY output_buf, JDIMENSION output_col)
00441 {
00442   INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
00443   INT32 z1, z2, z3;
00444   JCOEFPTR inptr;
00445   ISLOW_MULT_TYPE * quantptr;
00446   int * wsptr;
00447   JSAMPROW outptr;
00448   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
00449   int ctr;
00450   int workspace[7*7];   /* buffers data between passes */
00451   SHIFT_TEMPS
00452 
00453   /* Pass 1: process columns from input, store into work array. */
00454 
00455   inptr = coef_block;
00456   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
00457   wsptr = workspace;
00458   for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
00459     /* Even part */
00460 
00461     tmp13 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
00462     tmp13 <<= CONST_BITS;
00463     /* Add fudge factor here for final descale. */
00464     tmp13 += ONE << (CONST_BITS-PASS1_BITS-1);
00465 
00466     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
00467     z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
00468     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
00469 
00470     tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
00471     tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
00472     tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
00473     tmp0 = z1 + z3;
00474     z2 -= tmp0;
00475     tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
00476     tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
00477     tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
00478     tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
00479 
00480     /* Odd part */
00481 
00482     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
00483     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
00484     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
00485 
00486     tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
00487     tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
00488     tmp0 = tmp1 - tmp2;
00489     tmp1 += tmp2;
00490     tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
00491     tmp1 += tmp2;
00492     z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
00493     tmp0 += z2;
00494     tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
00495 
00496     /* Final output stage */
00497 
00498     wsptr[7*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
00499     wsptr[7*6] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
00500     wsptr[7*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
00501     wsptr[7*5] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
00502     wsptr[7*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
00503     wsptr[7*4] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
00504     wsptr[7*3] = (int) RIGHT_SHIFT(tmp13, CONST_BITS-PASS1_BITS);
00505   }
00506 
00507   /* Pass 2: process 7 rows from work array, store into output array. */
00508 
00509   wsptr = workspace;
00510   for (ctr = 0; ctr < 7; ctr++) {
00511     outptr = output_buf[ctr] + output_col;
00512 
00513     /* Even part */
00514 
00515     /* Add range center and fudge factor for final descale and range-limit. */
00516     tmp13 = (INT32) wsptr[0] +
00517           ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
00518            (ONE << (PASS1_BITS+2)));
00519     tmp13 <<= CONST_BITS;
00520 
00521     z1 = (INT32) wsptr[2];
00522     z2 = (INT32) wsptr[4];
00523     z3 = (INT32) wsptr[6];
00524 
00525     tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
00526     tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
00527     tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
00528     tmp0 = z1 + z3;
00529     z2 -= tmp0;
00530     tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
00531     tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
00532     tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
00533     tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
00534 
00535     /* Odd part */
00536 
00537     z1 = (INT32) wsptr[1];
00538     z2 = (INT32) wsptr[3];
00539     z3 = (INT32) wsptr[5];
00540 
00541     tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
00542     tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
00543     tmp0 = tmp1 - tmp2;
00544     tmp1 += tmp2;
00545     tmp2 = MULTIPLY(z2 + z3, - FIX(1.378756276));    /* -c1 */
00546     tmp1 += tmp2;
00547     z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
00548     tmp0 += z2;
00549     tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
00550 
00551     /* Final output stage */
00552 
00553     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
00554                           CONST_BITS+PASS1_BITS+3)
00555                 & RANGE_MASK];
00556     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
00557                           CONST_BITS+PASS1_BITS+3)
00558                 & RANGE_MASK];
00559     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
00560                           CONST_BITS+PASS1_BITS+3)
00561                 & RANGE_MASK];
00562     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
00563                           CONST_BITS+PASS1_BITS+3)
00564                 & RANGE_MASK];
00565     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
00566                           CONST_BITS+PASS1_BITS+3)
00567                 & RANGE_MASK];
00568     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
00569                           CONST_BITS+PASS1_BITS+3)
00570                 & RANGE_MASK];
00571     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13,
00572                           CONST_BITS+PASS1_BITS+3)
00573                 & RANGE_MASK];
00574 
00575     wsptr += 7;     /* advance pointer to next row */
00576   }
00577 }
00578 
00579 
00580 /*
00581  * Perform dequantization and inverse DCT on one block of coefficients,
00582  * producing a reduced-size 6x6 output block.
00583  *
00584  * Optimized algorithm with 3 multiplications in the 1-D kernel.
00585  * cK represents sqrt(2) * cos(K*pi/12).
00586  */
00587 
00588 GLOBAL(void)
00589 jpeg_idct_6x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
00590            JCOEFPTR coef_block,
00591            JSAMPARRAY output_buf, JDIMENSION output_col)
00592 {
00593   INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
00594   INT32 z1, z2, z3;
00595   JCOEFPTR inptr;
00596   ISLOW_MULT_TYPE * quantptr;
00597   int * wsptr;
00598   JSAMPROW outptr;
00599   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
00600   int ctr;
00601   int workspace[6*6];   /* buffers data between passes */
00602   SHIFT_TEMPS
00603 
00604   /* Pass 1: process columns from input, store into work array. */
00605 
00606   inptr = coef_block;
00607   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
00608   wsptr = workspace;
00609   for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
00610     /* Even part */
00611 
00612     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
00613     tmp0 <<= CONST_BITS;
00614     /* Add fudge factor here for final descale. */
00615     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
00616     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
00617     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
00618     tmp1 = tmp0 + tmp10;
00619     tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
00620     tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
00621     tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
00622     tmp10 = tmp1 + tmp0;
00623     tmp12 = tmp1 - tmp0;
00624 
00625     /* Odd part */
00626 
00627     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
00628     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
00629     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
00630     tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
00631     tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
00632     tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
00633     tmp1 = (z1 - z2 - z3) << PASS1_BITS;
00634 
00635     /* Final output stage */
00636 
00637     wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
00638     wsptr[6*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
00639     wsptr[6*1] = (int) (tmp11 + tmp1);
00640     wsptr[6*4] = (int) (tmp11 - tmp1);
00641     wsptr[6*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
00642     wsptr[6*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
00643   }
00644 
00645   /* Pass 2: process 6 rows from work array, store into output array. */
00646 
00647   wsptr = workspace;
00648   for (ctr = 0; ctr < 6; ctr++) {
00649     outptr = output_buf[ctr] + output_col;
00650 
00651     /* Even part */
00652 
00653     /* Add range center and fudge factor for final descale and range-limit. */
00654     tmp0 = (INT32) wsptr[0] +
00655          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
00656           (ONE << (PASS1_BITS+2)));
00657     tmp0 <<= CONST_BITS;
00658     tmp2 = (INT32) wsptr[4];
00659     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
00660     tmp1 = tmp0 + tmp10;
00661     tmp11 = tmp0 - tmp10 - tmp10;
00662     tmp10 = (INT32) wsptr[2];
00663     tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
00664     tmp10 = tmp1 + tmp0;
00665     tmp12 = tmp1 - tmp0;
00666 
00667     /* Odd part */
00668 
00669     z1 = (INT32) wsptr[1];
00670     z2 = (INT32) wsptr[3];
00671     z3 = (INT32) wsptr[5];
00672     tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
00673     tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
00674     tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
00675     tmp1 = (z1 - z2 - z3) << CONST_BITS;
00676 
00677     /* Final output stage */
00678 
00679     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
00680                           CONST_BITS+PASS1_BITS+3)
00681                 & RANGE_MASK];
00682     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
00683                           CONST_BITS+PASS1_BITS+3)
00684                 & RANGE_MASK];
00685     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
00686                           CONST_BITS+PASS1_BITS+3)
00687                 & RANGE_MASK];
00688     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
00689                           CONST_BITS+PASS1_BITS+3)
00690                 & RANGE_MASK];
00691     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
00692                           CONST_BITS+PASS1_BITS+3)
00693                 & RANGE_MASK];
00694     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
00695                           CONST_BITS+PASS1_BITS+3)
00696                 & RANGE_MASK];
00697 
00698     wsptr += 6;     /* advance pointer to next row */
00699   }
00700 }
00701 
00702 
00703 /*
00704  * Perform dequantization and inverse DCT on one block of coefficients,
00705  * producing a reduced-size 5x5 output block.
00706  *
00707  * Optimized algorithm with 5 multiplications in the 1-D kernel.
00708  * cK represents sqrt(2) * cos(K*pi/10).
00709  */
00710 
00711 GLOBAL(void)
00712 jpeg_idct_5x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
00713            JCOEFPTR coef_block,
00714            JSAMPARRAY output_buf, JDIMENSION output_col)
00715 {
00716   INT32 tmp0, tmp1, tmp10, tmp11, tmp12;
00717   INT32 z1, z2, z3;
00718   JCOEFPTR inptr;
00719   ISLOW_MULT_TYPE * quantptr;
00720   int * wsptr;
00721   JSAMPROW outptr;
00722   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
00723   int ctr;
00724   int workspace[5*5];   /* buffers data between passes */
00725   SHIFT_TEMPS
00726 
00727   /* Pass 1: process columns from input, store into work array. */
00728 
00729   inptr = coef_block;
00730   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
00731   wsptr = workspace;
00732   for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
00733     /* Even part */
00734 
00735     tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
00736     tmp12 <<= CONST_BITS;
00737     /* Add fudge factor here for final descale. */
00738     tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
00739     tmp0 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
00740     tmp1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
00741     z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
00742     z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
00743     z3 = tmp12 + z2;
00744     tmp10 = z3 + z1;
00745     tmp11 = z3 - z1;
00746     tmp12 -= z2 << 2;
00747 
00748     /* Odd part */
00749 
00750     z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
00751     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
00752 
00753     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
00754     tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
00755     tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
00756 
00757     /* Final output stage */
00758 
00759     wsptr[5*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
00760     wsptr[5*4] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
00761     wsptr[5*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
00762     wsptr[5*3] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
00763     wsptr[5*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
00764   }
00765 
00766   /* Pass 2: process 5 rows from work array, store into output array. */
00767 
00768   wsptr = workspace;
00769   for (ctr = 0; ctr < 5; ctr++) {
00770     outptr = output_buf[ctr] + output_col;
00771 
00772     /* Even part */
00773 
00774     /* Add range center and fudge factor for final descale and range-limit. */
00775     tmp12 = (INT32) wsptr[0] +
00776           ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
00777            (ONE << (PASS1_BITS+2)));
00778     tmp12 <<= CONST_BITS;
00779     tmp0 = (INT32) wsptr[2];
00780     tmp1 = (INT32) wsptr[4];
00781     z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
00782     z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
00783     z3 = tmp12 + z2;
00784     tmp10 = z3 + z1;
00785     tmp11 = z3 - z1;
00786     tmp12 -= z2 << 2;
00787 
00788     /* Odd part */
00789 
00790     z2 = (INT32) wsptr[1];
00791     z3 = (INT32) wsptr[3];
00792 
00793     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
00794     tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
00795     tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
00796 
00797     /* Final output stage */
00798 
00799     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
00800                           CONST_BITS+PASS1_BITS+3)
00801                 & RANGE_MASK];
00802     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
00803                           CONST_BITS+PASS1_BITS+3)
00804                 & RANGE_MASK];
00805     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
00806                           CONST_BITS+PASS1_BITS+3)
00807                 & RANGE_MASK];
00808     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
00809                           CONST_BITS+PASS1_BITS+3)
00810                 & RANGE_MASK];
00811     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
00812                           CONST_BITS+PASS1_BITS+3)
00813                 & RANGE_MASK];
00814 
00815     wsptr += 5;     /* advance pointer to next row */
00816   }
00817 }
00818 
00819 
00820 /*
00821  * Perform dequantization and inverse DCT on one block of coefficients,
00822  * producing a reduced-size 4x4 output block.
00823  *
00824  * Optimized algorithm with 3 multiplications in the 1-D kernel.
00825  * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
00826  */
00827 
00828 GLOBAL(void)
00829 jpeg_idct_4x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
00830            JCOEFPTR coef_block,
00831            JSAMPARRAY output_buf, JDIMENSION output_col)
00832 {
00833   INT32 tmp0, tmp2, tmp10, tmp12;
00834   INT32 z1, z2, z3;
00835   JCOEFPTR inptr;
00836   ISLOW_MULT_TYPE * quantptr;
00837   int * wsptr;
00838   JSAMPROW outptr;
00839   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
00840   int ctr;
00841   int workspace[4*4];   /* buffers data between passes */
00842   SHIFT_TEMPS
00843 
00844   /* Pass 1: process columns from input, store into work array. */
00845 
00846   inptr = coef_block;
00847   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
00848   wsptr = workspace;
00849   for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
00850     /* Even part */
00851 
00852     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
00853     tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
00854     
00855     tmp10 = (tmp0 + tmp2) << PASS1_BITS;
00856     tmp12 = (tmp0 - tmp2) << PASS1_BITS;
00857 
00858     /* Odd part */
00859     /* Same rotation as in the even part of the 8x8 LL&M IDCT */
00860 
00861     z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
00862     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
00863 
00864     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
00865     /* Add fudge factor here for final descale. */
00866     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
00867     tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
00868                CONST_BITS-PASS1_BITS);
00869     tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
00870                CONST_BITS-PASS1_BITS);
00871 
00872     /* Final output stage */
00873 
00874     wsptr[4*0] = (int) (tmp10 + tmp0);
00875     wsptr[4*3] = (int) (tmp10 - tmp0);
00876     wsptr[4*1] = (int) (tmp12 + tmp2);
00877     wsptr[4*2] = (int) (tmp12 - tmp2);
00878   }
00879 
00880   /* Pass 2: process 4 rows from work array, store into output array. */
00881 
00882   wsptr = workspace;
00883   for (ctr = 0; ctr < 4; ctr++) {
00884     outptr = output_buf[ctr] + output_col;
00885 
00886     /* Even part */
00887 
00888     /* Add range center and fudge factor for final descale and range-limit. */
00889     tmp0 = (INT32) wsptr[0] +
00890          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
00891           (ONE << (PASS1_BITS+2)));
00892     tmp2 = (INT32) wsptr[2];
00893 
00894     tmp10 = (tmp0 + tmp2) << CONST_BITS;
00895     tmp12 = (tmp0 - tmp2) << CONST_BITS;
00896 
00897     /* Odd part */
00898     /* Same rotation as in the even part of the 8x8 LL&M IDCT */
00899 
00900     z2 = (INT32) wsptr[1];
00901     z3 = (INT32) wsptr[3];
00902 
00903     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
00904     tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
00905     tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
00906 
00907     /* Final output stage */
00908 
00909     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
00910                           CONST_BITS+PASS1_BITS+3)
00911                 & RANGE_MASK];
00912     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
00913                           CONST_BITS+PASS1_BITS+3)
00914                 & RANGE_MASK];
00915     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
00916                           CONST_BITS+PASS1_BITS+3)
00917                 & RANGE_MASK];
00918     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
00919                           CONST_BITS+PASS1_BITS+3)
00920                 & RANGE_MASK];
00921 
00922     wsptr += 4;     /* advance pointer to next row */
00923   }
00924 }
00925 
00926 
00927 /*
00928  * Perform dequantization and inverse DCT on one block of coefficients,
00929  * producing a reduced-size 3x3 output block.
00930  *
00931  * Optimized algorithm with 2 multiplications in the 1-D kernel.
00932  * cK represents sqrt(2) * cos(K*pi/6).
00933  */
00934 
00935 GLOBAL(void)
00936 jpeg_idct_3x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
00937            JCOEFPTR coef_block,
00938            JSAMPARRAY output_buf, JDIMENSION output_col)
00939 {
00940   INT32 tmp0, tmp2, tmp10, tmp12;
00941   JCOEFPTR inptr;
00942   ISLOW_MULT_TYPE * quantptr;
00943   int * wsptr;
00944   JSAMPROW outptr;
00945   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
00946   int ctr;
00947   int workspace[3*3];   /* buffers data between passes */
00948   SHIFT_TEMPS
00949 
00950   /* Pass 1: process columns from input, store into work array. */
00951 
00952   inptr = coef_block;
00953   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
00954   wsptr = workspace;
00955   for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
00956     /* Even part */
00957 
00958     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
00959     tmp0 <<= CONST_BITS;
00960     /* Add fudge factor here for final descale. */
00961     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
00962     tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
00963     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
00964     tmp10 = tmp0 + tmp12;
00965     tmp2 = tmp0 - tmp12 - tmp12;
00966 
00967     /* Odd part */
00968 
00969     tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
00970     tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
00971 
00972     /* Final output stage */
00973 
00974     wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
00975     wsptr[3*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
00976     wsptr[3*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
00977   }
00978 
00979   /* Pass 2: process 3 rows from work array, store into output array. */
00980 
00981   wsptr = workspace;
00982   for (ctr = 0; ctr < 3; ctr++) {
00983     outptr = output_buf[ctr] + output_col;
00984 
00985     /* Even part */
00986 
00987     /* Add range center and fudge factor for final descale and range-limit. */
00988     tmp0 = (INT32) wsptr[0] +
00989          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
00990           (ONE << (PASS1_BITS+2)));
00991     tmp0 <<= CONST_BITS;
00992     tmp2 = (INT32) wsptr[2];
00993     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
00994     tmp10 = tmp0 + tmp12;
00995     tmp2 = tmp0 - tmp12 - tmp12;
00996 
00997     /* Odd part */
00998 
00999     tmp12 = (INT32) wsptr[1];
01000     tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
01001 
01002     /* Final output stage */
01003 
01004     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
01005                           CONST_BITS+PASS1_BITS+3)
01006                 & RANGE_MASK];
01007     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
01008                           CONST_BITS+PASS1_BITS+3)
01009                 & RANGE_MASK];
01010     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
01011                           CONST_BITS+PASS1_BITS+3)
01012                 & RANGE_MASK];
01013 
01014     wsptr += 3;     /* advance pointer to next row */
01015   }
01016 }
01017 
01018 
01019 /*
01020  * Perform dequantization and inverse DCT on one block of coefficients,
01021  * producing a reduced-size 2x2 output block.
01022  *
01023  * Multiplication-less algorithm.
01024  */
01025 
01026 GLOBAL(void)
01027 jpeg_idct_2x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
01028            JCOEFPTR coef_block,
01029            JSAMPARRAY output_buf, JDIMENSION output_col)
01030 {
01031   DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
01032   ISLOW_MULT_TYPE * quantptr;
01033   JSAMPROW outptr;
01034   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
01035   ISHIFT_TEMPS
01036 
01037   /* Pass 1: process columns from input. */
01038 
01039   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
01040 
01041   /* Column 0 */
01042   tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
01043   tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
01044   /* Add range center and fudge factor for final descale and range-limit. */
01045   tmp4 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
01046 
01047   tmp0 = tmp4 + tmp5;
01048   tmp2 = tmp4 - tmp5;
01049 
01050   /* Column 1 */
01051   tmp4 = DEQUANTIZE(coef_block[DCTSIZE*0+1], quantptr[DCTSIZE*0+1]);
01052   tmp5 = DEQUANTIZE(coef_block[DCTSIZE*1+1], quantptr[DCTSIZE*1+1]);
01053 
01054   tmp1 = tmp4 + tmp5;
01055   tmp3 = tmp4 - tmp5;
01056 
01057   /* Pass 2: process 2 rows, store into output array. */
01058 
01059   /* Row 0 */
01060   outptr = output_buf[0] + output_col;
01061 
01062   outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
01063   outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
01064 
01065   /* Row 1 */
01066   outptr = output_buf[1] + output_col;
01067 
01068   outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp2 + tmp3, 3) & RANGE_MASK];
01069   outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp2 - tmp3, 3) & RANGE_MASK];
01070 }
01071 
01072 
01073 /*
01074  * Perform dequantization and inverse DCT on one block of coefficients,
01075  * producing a reduced-size 1x1 output block.
01076  *
01077  * We hardly need an inverse DCT routine for this: just take the
01078  * average pixel value, which is one-eighth of the DC coefficient.
01079  */
01080 
01081 GLOBAL(void)
01082 jpeg_idct_1x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
01083            JCOEFPTR coef_block,
01084            JSAMPARRAY output_buf, JDIMENSION output_col)
01085 {
01086   DCTELEM dcval;
01087   ISLOW_MULT_TYPE * quantptr;
01088   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
01089   ISHIFT_TEMPS
01090 
01091   /* 1x1 is trivial: just take the DC coefficient divided by 8. */
01092 
01093   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
01094 
01095   dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
01096   /* Add range center and fudge factor for descale and range-limit. */
01097   dcval += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
01098 
01099   output_buf[0][output_col] =
01100     range_limit[(int) IRIGHT_SHIFT(dcval, 3) & RANGE_MASK];
01101 }
01102 
01103 
01104 /*
01105  * Perform dequantization and inverse DCT on one block of coefficients,
01106  * producing a 9x9 output block.
01107  *
01108  * Optimized algorithm with 10 multiplications in the 1-D kernel.
01109  * cK represents sqrt(2) * cos(K*pi/18).
01110  */
01111 
01112 GLOBAL(void)
01113 jpeg_idct_9x9 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
01114            JCOEFPTR coef_block,
01115            JSAMPARRAY output_buf, JDIMENSION output_col)
01116 {
01117   INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
01118   INT32 z1, z2, z3, z4;
01119   JCOEFPTR inptr;
01120   ISLOW_MULT_TYPE * quantptr;
01121   int * wsptr;
01122   JSAMPROW outptr;
01123   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
01124   int ctr;
01125   int workspace[8*9];   /* buffers data between passes */
01126   SHIFT_TEMPS
01127 
01128   /* Pass 1: process columns from input, store into work array. */
01129 
01130   inptr = coef_block;
01131   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
01132   wsptr = workspace;
01133   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
01134     /* Even part */
01135 
01136     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
01137     tmp0 <<= CONST_BITS;
01138     /* Add fudge factor here for final descale. */
01139     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
01140 
01141     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
01142     z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
01143     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
01144 
01145     tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
01146     tmp1 = tmp0 + tmp3;
01147     tmp2 = tmp0 - tmp3 - tmp3;
01148 
01149     tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
01150     tmp11 = tmp2 + tmp0;
01151     tmp14 = tmp2 - tmp0 - tmp0;
01152 
01153     tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
01154     tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
01155     tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
01156 
01157     tmp10 = tmp1 + tmp0 - tmp3;
01158     tmp12 = tmp1 - tmp0 + tmp2;
01159     tmp13 = tmp1 - tmp2 + tmp3;
01160 
01161     /* Odd part */
01162 
01163     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
01164     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
01165     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
01166     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
01167 
01168     z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
01169 
01170     tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
01171     tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
01172     tmp0 = tmp2 + tmp3 - z2;
01173     tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
01174     tmp2 += z2 - tmp1;
01175     tmp3 += z2 + tmp1;
01176     tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
01177 
01178     /* Final output stage */
01179 
01180     wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
01181     wsptr[8*8] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
01182     wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS-PASS1_BITS);
01183     wsptr[8*7] = (int) RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS-PASS1_BITS);
01184     wsptr[8*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
01185     wsptr[8*6] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
01186     wsptr[8*3] = (int) RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS-PASS1_BITS);
01187     wsptr[8*5] = (int) RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS-PASS1_BITS);
01188     wsptr[8*4] = (int) RIGHT_SHIFT(tmp14, CONST_BITS-PASS1_BITS);
01189   }
01190 
01191   /* Pass 2: process 9 rows from work array, store into output array. */
01192 
01193   wsptr = workspace;
01194   for (ctr = 0; ctr < 9; ctr++) {
01195     outptr = output_buf[ctr] + output_col;
01196 
01197     /* Even part */
01198 
01199     /* Add range center and fudge factor for final descale and range-limit. */
01200     tmp0 = (INT32) wsptr[0] +
01201          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
01202           (ONE << (PASS1_BITS+2)));
01203     tmp0 <<= CONST_BITS;
01204 
01205     z1 = (INT32) wsptr[2];
01206     z2 = (INT32) wsptr[4];
01207     z3 = (INT32) wsptr[6];
01208 
01209     tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
01210     tmp1 = tmp0 + tmp3;
01211     tmp2 = tmp0 - tmp3 - tmp3;
01212 
01213     tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
01214     tmp11 = tmp2 + tmp0;
01215     tmp14 = tmp2 - tmp0 - tmp0;
01216 
01217     tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
01218     tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
01219     tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
01220 
01221     tmp10 = tmp1 + tmp0 - tmp3;
01222     tmp12 = tmp1 - tmp0 + tmp2;
01223     tmp13 = tmp1 - tmp2 + tmp3;
01224 
01225     /* Odd part */
01226 
01227     z1 = (INT32) wsptr[1];
01228     z2 = (INT32) wsptr[3];
01229     z3 = (INT32) wsptr[5];
01230     z4 = (INT32) wsptr[7];
01231 
01232     z2 = MULTIPLY(z2, - FIX(1.224744871));           /* -c3 */
01233 
01234     tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
01235     tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
01236     tmp0 = tmp2 + tmp3 - z2;
01237     tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
01238     tmp2 += z2 - tmp1;
01239     tmp3 += z2 + tmp1;
01240     tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
01241 
01242     /* Final output stage */
01243 
01244     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
01245                           CONST_BITS+PASS1_BITS+3)
01246                 & RANGE_MASK];
01247     outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
01248                           CONST_BITS+PASS1_BITS+3)
01249                 & RANGE_MASK];
01250     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
01251                           CONST_BITS+PASS1_BITS+3)
01252                 & RANGE_MASK];
01253     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
01254                           CONST_BITS+PASS1_BITS+3)
01255                 & RANGE_MASK];
01256     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
01257                           CONST_BITS+PASS1_BITS+3)
01258                 & RANGE_MASK];
01259     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
01260                           CONST_BITS+PASS1_BITS+3)
01261                 & RANGE_MASK];
01262     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp3,
01263                           CONST_BITS+PASS1_BITS+3)
01264                 & RANGE_MASK];
01265     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp3,
01266                           CONST_BITS+PASS1_BITS+3)
01267                 & RANGE_MASK];
01268     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp14,
01269                           CONST_BITS+PASS1_BITS+3)
01270                 & RANGE_MASK];
01271 
01272     wsptr += 8;     /* advance pointer to next row */
01273   }
01274 }
01275 
01276 
01277 /*
01278  * Perform dequantization and inverse DCT on one block of coefficients,
01279  * producing a 10x10 output block.
01280  *
01281  * Optimized algorithm with 12 multiplications in the 1-D kernel.
01282  * cK represents sqrt(2) * cos(K*pi/20).
01283  */
01284 
01285 GLOBAL(void)
01286 jpeg_idct_10x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
01287          JCOEFPTR coef_block,
01288          JSAMPARRAY output_buf, JDIMENSION output_col)
01289 {
01290   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
01291   INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
01292   INT32 z1, z2, z3, z4, z5;
01293   JCOEFPTR inptr;
01294   ISLOW_MULT_TYPE * quantptr;
01295   int * wsptr;
01296   JSAMPROW outptr;
01297   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
01298   int ctr;
01299   int workspace[8*10];  /* buffers data between passes */
01300   SHIFT_TEMPS
01301 
01302   /* Pass 1: process columns from input, store into work array. */
01303 
01304   inptr = coef_block;
01305   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
01306   wsptr = workspace;
01307   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
01308     /* Even part */
01309 
01310     z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
01311     z3 <<= CONST_BITS;
01312     /* Add fudge factor here for final descale. */
01313     z3 += ONE << (CONST_BITS-PASS1_BITS-1);
01314     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
01315     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
01316     z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
01317     tmp10 = z3 + z1;
01318     tmp11 = z3 - z2;
01319 
01320     tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
01321             CONST_BITS-PASS1_BITS);
01322 
01323     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
01324     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
01325 
01326     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
01327     tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
01328     tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
01329 
01330     tmp20 = tmp10 + tmp12;
01331     tmp24 = tmp10 - tmp12;
01332     tmp21 = tmp11 + tmp13;
01333     tmp23 = tmp11 - tmp13;
01334 
01335     /* Odd part */
01336 
01337     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
01338     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
01339     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
01340     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
01341 
01342     tmp11 = z2 + z4;
01343     tmp13 = z2 - z4;
01344 
01345     tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
01346     z5 = z3 << CONST_BITS;
01347 
01348     z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
01349     z4 = z5 + tmp12;
01350 
01351     tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
01352     tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
01353 
01354     z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
01355     z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
01356 
01357     tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
01358 
01359     tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
01360     tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
01361 
01362     /* Final output stage */
01363 
01364     wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
01365     wsptr[8*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
01366     wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
01367     wsptr[8*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
01368     wsptr[8*2] = (int) (tmp22 + tmp12);
01369     wsptr[8*7] = (int) (tmp22 - tmp12);
01370     wsptr[8*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
01371     wsptr[8*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
01372     wsptr[8*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
01373     wsptr[8*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
01374   }
01375 
01376   /* Pass 2: process 10 rows from work array, store into output array. */
01377 
01378   wsptr = workspace;
01379   for (ctr = 0; ctr < 10; ctr++) {
01380     outptr = output_buf[ctr] + output_col;
01381 
01382     /* Even part */
01383 
01384     /* Add range center and fudge factor for final descale and range-limit. */
01385     z3 = (INT32) wsptr[0] +
01386        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
01387         (ONE << (PASS1_BITS+2)));
01388     z3 <<= CONST_BITS;
01389     z4 = (INT32) wsptr[4];
01390     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
01391     z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
01392     tmp10 = z3 + z1;
01393     tmp11 = z3 - z2;
01394 
01395     tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
01396 
01397     z2 = (INT32) wsptr[2];
01398     z3 = (INT32) wsptr[6];
01399 
01400     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
01401     tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
01402     tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
01403 
01404     tmp20 = tmp10 + tmp12;
01405     tmp24 = tmp10 - tmp12;
01406     tmp21 = tmp11 + tmp13;
01407     tmp23 = tmp11 - tmp13;
01408 
01409     /* Odd part */
01410 
01411     z1 = (INT32) wsptr[1];
01412     z2 = (INT32) wsptr[3];
01413     z3 = (INT32) wsptr[5];
01414     z3 <<= CONST_BITS;
01415     z4 = (INT32) wsptr[7];
01416 
01417     tmp11 = z2 + z4;
01418     tmp13 = z2 - z4;
01419 
01420     tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
01421 
01422     z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
01423     z4 = z3 + tmp12;
01424 
01425     tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
01426     tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
01427 
01428     z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
01429     z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
01430 
01431     tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
01432 
01433     tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
01434     tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
01435 
01436     /* Final output stage */
01437 
01438     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
01439                           CONST_BITS+PASS1_BITS+3)
01440                 & RANGE_MASK];
01441     outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
01442                           CONST_BITS+PASS1_BITS+3)
01443                 & RANGE_MASK];
01444     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
01445                           CONST_BITS+PASS1_BITS+3)
01446                 & RANGE_MASK];
01447     outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
01448                           CONST_BITS+PASS1_BITS+3)
01449                 & RANGE_MASK];
01450     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
01451                           CONST_BITS+PASS1_BITS+3)
01452                 & RANGE_MASK];
01453     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
01454                           CONST_BITS+PASS1_BITS+3)
01455                 & RANGE_MASK];
01456     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
01457                           CONST_BITS+PASS1_BITS+3)
01458                 & RANGE_MASK];
01459     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
01460                           CONST_BITS+PASS1_BITS+3)
01461                 & RANGE_MASK];
01462     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
01463                           CONST_BITS+PASS1_BITS+3)
01464                 & RANGE_MASK];
01465     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
01466                           CONST_BITS+PASS1_BITS+3)
01467                 & RANGE_MASK];
01468 
01469     wsptr += 8;     /* advance pointer to next row */
01470   }
01471 }
01472 
01473 
01474 /*
01475  * Perform dequantization and inverse DCT on one block of coefficients,
01476  * producing a 11x11 output block.
01477  *
01478  * Optimized algorithm with 24 multiplications in the 1-D kernel.
01479  * cK represents sqrt(2) * cos(K*pi/22).
01480  */
01481 
01482 GLOBAL(void)
01483 jpeg_idct_11x11 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
01484          JCOEFPTR coef_block,
01485          JSAMPARRAY output_buf, JDIMENSION output_col)
01486 {
01487   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
01488   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
01489   INT32 z1, z2, z3, z4;
01490   JCOEFPTR inptr;
01491   ISLOW_MULT_TYPE * quantptr;
01492   int * wsptr;
01493   JSAMPROW outptr;
01494   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
01495   int ctr;
01496   int workspace[8*11];  /* buffers data between passes */
01497   SHIFT_TEMPS
01498 
01499   /* Pass 1: process columns from input, store into work array. */
01500 
01501   inptr = coef_block;
01502   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
01503   wsptr = workspace;
01504   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
01505     /* Even part */
01506 
01507     tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
01508     tmp10 <<= CONST_BITS;
01509     /* Add fudge factor here for final descale. */
01510     tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
01511 
01512     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
01513     z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
01514     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
01515 
01516     tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
01517     tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
01518     z4 = z1 + z3;
01519     tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
01520     z4 -= z2;
01521     tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
01522     tmp21 = tmp20 + tmp23 + tmp25 -
01523         MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
01524     tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
01525     tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
01526     tmp24 += tmp25;
01527     tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
01528     tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
01529          MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
01530     tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
01531 
01532     /* Odd part */
01533 
01534     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
01535     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
01536     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
01537     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
01538 
01539     tmp11 = z1 + z2;
01540     tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
01541     tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
01542     tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
01543     tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
01544     tmp10 = tmp11 + tmp12 + tmp13 -
01545         MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
01546     z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
01547     tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
01548     tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
01549     z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
01550     tmp11 += z1;
01551     tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
01552     tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
01553          MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
01554          MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
01555 
01556     /* Final output stage */
01557 
01558     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
01559     wsptr[8*10] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
01560     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
01561     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
01562     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
01563     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
01564     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
01565     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
01566     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
01567     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
01568     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25, CONST_BITS-PASS1_BITS);
01569   }
01570 
01571   /* Pass 2: process 11 rows from work array, store into output array. */
01572 
01573   wsptr = workspace;
01574   for (ctr = 0; ctr < 11; ctr++) {
01575     outptr = output_buf[ctr] + output_col;
01576 
01577     /* Even part */
01578 
01579     /* Add range center and fudge factor for final descale and range-limit. */
01580     tmp10 = (INT32) wsptr[0] +
01581           ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
01582            (ONE << (PASS1_BITS+2)));
01583     tmp10 <<= CONST_BITS;
01584 
01585     z1 = (INT32) wsptr[2];
01586     z2 = (INT32) wsptr[4];
01587     z3 = (INT32) wsptr[6];
01588 
01589     tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
01590     tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
01591     z4 = z1 + z3;
01592     tmp24 = MULTIPLY(z4, - FIX(1.155664402));        /* -(c2-c10) */
01593     z4 -= z2;
01594     tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
01595     tmp21 = tmp20 + tmp23 + tmp25 -
01596         MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
01597     tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
01598     tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
01599     tmp24 += tmp25;
01600     tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
01601     tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
01602          MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
01603     tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
01604 
01605     /* Odd part */
01606 
01607     z1 = (INT32) wsptr[1];
01608     z2 = (INT32) wsptr[3];
01609     z3 = (INT32) wsptr[5];
01610     z4 = (INT32) wsptr[7];
01611 
01612     tmp11 = z1 + z2;
01613     tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
01614     tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
01615     tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
01616     tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
01617     tmp10 = tmp11 + tmp12 + tmp13 -
01618         MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
01619     z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
01620     tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
01621     tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
01622     z1    = MULTIPLY(z2 + z4, - FIX(1.798248910));       /* -(c1+c9) */
01623     tmp11 += z1;
01624     tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
01625     tmp14 += MULTIPLY(z2, - FIX(1.467221301)) +          /* -(c5+c9) */
01626          MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
01627          MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
01628 
01629     /* Final output stage */
01630 
01631     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
01632                            CONST_BITS+PASS1_BITS+3)
01633                  & RANGE_MASK];
01634     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
01635                            CONST_BITS+PASS1_BITS+3)
01636                  & RANGE_MASK];
01637     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
01638                            CONST_BITS+PASS1_BITS+3)
01639                  & RANGE_MASK];
01640     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
01641                            CONST_BITS+PASS1_BITS+3)
01642                  & RANGE_MASK];
01643     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
01644                            CONST_BITS+PASS1_BITS+3)
01645                  & RANGE_MASK];
01646     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
01647                            CONST_BITS+PASS1_BITS+3)
01648                  & RANGE_MASK];
01649     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
01650                            CONST_BITS+PASS1_BITS+3)
01651                  & RANGE_MASK];
01652     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
01653                            CONST_BITS+PASS1_BITS+3)
01654                  & RANGE_MASK];
01655     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
01656                            CONST_BITS+PASS1_BITS+3)
01657                  & RANGE_MASK];
01658     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
01659                            CONST_BITS+PASS1_BITS+3)
01660                  & RANGE_MASK];
01661     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25,
01662                            CONST_BITS+PASS1_BITS+3)
01663                  & RANGE_MASK];
01664 
01665     wsptr += 8;     /* advance pointer to next row */
01666   }
01667 }
01668 
01669 
01670 /*
01671  * Perform dequantization and inverse DCT on one block of coefficients,
01672  * producing a 12x12 output block.
01673  *
01674  * Optimized algorithm with 15 multiplications in the 1-D kernel.
01675  * cK represents sqrt(2) * cos(K*pi/24).
01676  */
01677 
01678 GLOBAL(void)
01679 jpeg_idct_12x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
01680          JCOEFPTR coef_block,
01681          JSAMPARRAY output_buf, JDIMENSION output_col)
01682 {
01683   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
01684   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
01685   INT32 z1, z2, z3, z4;
01686   JCOEFPTR inptr;
01687   ISLOW_MULT_TYPE * quantptr;
01688   int * wsptr;
01689   JSAMPROW outptr;
01690   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
01691   int ctr;
01692   int workspace[8*12];  /* buffers data between passes */
01693   SHIFT_TEMPS
01694 
01695   /* Pass 1: process columns from input, store into work array. */
01696 
01697   inptr = coef_block;
01698   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
01699   wsptr = workspace;
01700   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
01701     /* Even part */
01702 
01703     z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
01704     z3 <<= CONST_BITS;
01705     /* Add fudge factor here for final descale. */
01706     z3 += ONE << (CONST_BITS-PASS1_BITS-1);
01707 
01708     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
01709     z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
01710 
01711     tmp10 = z3 + z4;
01712     tmp11 = z3 - z4;
01713 
01714     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
01715     z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
01716     z1 <<= CONST_BITS;
01717     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
01718     z2 <<= CONST_BITS;
01719 
01720     tmp12 = z1 - z2;
01721 
01722     tmp21 = z3 + tmp12;
01723     tmp24 = z3 - tmp12;
01724 
01725     tmp12 = z4 + z2;
01726 
01727     tmp20 = tmp10 + tmp12;
01728     tmp25 = tmp10 - tmp12;
01729 
01730     tmp12 = z4 - z1 - z2;
01731 
01732     tmp22 = tmp11 + tmp12;
01733     tmp23 = tmp11 - tmp12;
01734 
01735     /* Odd part */
01736 
01737     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
01738     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
01739     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
01740     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
01741 
01742     tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
01743     tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
01744 
01745     tmp10 = z1 + z3;
01746     tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
01747     tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
01748     tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
01749     tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
01750     tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
01751     tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
01752     tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
01753          MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
01754 
01755     z1 -= z4;
01756     z2 -= z3;
01757     z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
01758     tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
01759     tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
01760 
01761     /* Final output stage */
01762 
01763     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
01764     wsptr[8*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
01765     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
01766     wsptr[8*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
01767     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
01768     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
01769     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
01770     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
01771     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
01772     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
01773     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
01774     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
01775   }
01776 
01777   /* Pass 2: process 12 rows from work array, store into output array. */
01778 
01779   wsptr = workspace;
01780   for (ctr = 0; ctr < 12; ctr++) {
01781     outptr = output_buf[ctr] + output_col;
01782 
01783     /* Even part */
01784 
01785     /* Add range center and fudge factor for final descale and range-limit. */
01786     z3 = (INT32) wsptr[0] +
01787        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
01788         (ONE << (PASS1_BITS+2)));
01789     z3 <<= CONST_BITS;
01790 
01791     z4 = (INT32) wsptr[4];
01792     z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
01793 
01794     tmp10 = z3 + z4;
01795     tmp11 = z3 - z4;
01796 
01797     z1 = (INT32) wsptr[2];
01798     z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
01799     z1 <<= CONST_BITS;
01800     z2 = (INT32) wsptr[6];
01801     z2 <<= CONST_BITS;
01802 
01803     tmp12 = z1 - z2;
01804 
01805     tmp21 = z3 + tmp12;
01806     tmp24 = z3 - tmp12;
01807 
01808     tmp12 = z4 + z2;
01809 
01810     tmp20 = tmp10 + tmp12;
01811     tmp25 = tmp10 - tmp12;
01812 
01813     tmp12 = z4 - z1 - z2;
01814 
01815     tmp22 = tmp11 + tmp12;
01816     tmp23 = tmp11 - tmp12;
01817 
01818     /* Odd part */
01819 
01820     z1 = (INT32) wsptr[1];
01821     z2 = (INT32) wsptr[3];
01822     z3 = (INT32) wsptr[5];
01823     z4 = (INT32) wsptr[7];
01824 
01825     tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
01826     tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
01827 
01828     tmp10 = z1 + z3;
01829     tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
01830     tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
01831     tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
01832     tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
01833     tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
01834     tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
01835     tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
01836          MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
01837 
01838     z1 -= z4;
01839     z2 -= z3;
01840     z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
01841     tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
01842     tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
01843 
01844     /* Final output stage */
01845 
01846     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
01847                            CONST_BITS+PASS1_BITS+3)
01848                  & RANGE_MASK];
01849     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
01850                            CONST_BITS+PASS1_BITS+3)
01851                  & RANGE_MASK];
01852     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
01853                            CONST_BITS+PASS1_BITS+3)
01854                  & RANGE_MASK];
01855     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
01856                            CONST_BITS+PASS1_BITS+3)
01857                  & RANGE_MASK];
01858     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
01859                            CONST_BITS+PASS1_BITS+3)
01860                  & RANGE_MASK];
01861     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
01862                            CONST_BITS+PASS1_BITS+3)
01863                  & RANGE_MASK];
01864     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
01865                            CONST_BITS+PASS1_BITS+3)
01866                  & RANGE_MASK];
01867     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
01868                            CONST_BITS+PASS1_BITS+3)
01869                  & RANGE_MASK];
01870     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
01871                            CONST_BITS+PASS1_BITS+3)
01872                  & RANGE_MASK];
01873     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
01874                            CONST_BITS+PASS1_BITS+3)
01875                  & RANGE_MASK];
01876     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
01877                            CONST_BITS+PASS1_BITS+3)
01878                  & RANGE_MASK];
01879     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
01880                            CONST_BITS+PASS1_BITS+3)
01881                  & RANGE_MASK];
01882 
01883     wsptr += 8;     /* advance pointer to next row */
01884   }
01885 }
01886 
01887 
01888 /*
01889  * Perform dequantization and inverse DCT on one block of coefficients,
01890  * producing a 13x13 output block.
01891  *
01892  * Optimized algorithm with 29 multiplications in the 1-D kernel.
01893  * cK represents sqrt(2) * cos(K*pi/26).
01894  */
01895 
01896 GLOBAL(void)
01897 jpeg_idct_13x13 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
01898          JCOEFPTR coef_block,
01899          JSAMPARRAY output_buf, JDIMENSION output_col)
01900 {
01901   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
01902   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
01903   INT32 z1, z2, z3, z4;
01904   JCOEFPTR inptr;
01905   ISLOW_MULT_TYPE * quantptr;
01906   int * wsptr;
01907   JSAMPROW outptr;
01908   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
01909   int ctr;
01910   int workspace[8*13];  /* buffers data between passes */
01911   SHIFT_TEMPS
01912 
01913   /* Pass 1: process columns from input, store into work array. */
01914 
01915   inptr = coef_block;
01916   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
01917   wsptr = workspace;
01918   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
01919     /* Even part */
01920 
01921     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
01922     z1 <<= CONST_BITS;
01923     /* Add fudge factor here for final descale. */
01924     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
01925 
01926     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
01927     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
01928     z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
01929 
01930     tmp10 = z3 + z4;
01931     tmp11 = z3 - z4;
01932 
01933     tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
01934     tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
01935 
01936     tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
01937     tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
01938 
01939     tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
01940     tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
01941 
01942     tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
01943     tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
01944 
01945     tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
01946     tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
01947 
01948     tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
01949     tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
01950 
01951     tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
01952 
01953     /* Odd part */
01954 
01955     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
01956     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
01957     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
01958     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
01959 
01960     tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
01961     tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
01962     tmp15 = z1 + z4;
01963     tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
01964     tmp10 = tmp11 + tmp12 + tmp13 -
01965         MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
01966     tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
01967     tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
01968     tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
01969     tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
01970     tmp11 += tmp14;
01971     tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
01972     tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
01973     tmp12 += tmp14;
01974     tmp13 += tmp14;
01975     tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
01976     tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
01977         MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
01978     z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
01979     tmp14 += z1;
01980     tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
01981          MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
01982 
01983     /* Final output stage */
01984 
01985     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
01986     wsptr[8*12] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
01987     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
01988     wsptr[8*11] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
01989     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
01990     wsptr[8*10] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
01991     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
01992     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
01993     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
01994     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
01995     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
01996     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
01997     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26, CONST_BITS-PASS1_BITS);
01998   }
01999 
02000   /* Pass 2: process 13 rows from work array, store into output array. */
02001 
02002   wsptr = workspace;
02003   for (ctr = 0; ctr < 13; ctr++) {
02004     outptr = output_buf[ctr] + output_col;
02005 
02006     /* Even part */
02007 
02008     /* Add range center and fudge factor for final descale and range-limit. */
02009     z1 = (INT32) wsptr[0] +
02010        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
02011         (ONE << (PASS1_BITS+2)));
02012     z1 <<= CONST_BITS;
02013 
02014     z2 = (INT32) wsptr[2];
02015     z3 = (INT32) wsptr[4];
02016     z4 = (INT32) wsptr[6];
02017 
02018     tmp10 = z3 + z4;
02019     tmp11 = z3 - z4;
02020 
02021     tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
02022     tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
02023 
02024     tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
02025     tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
02026 
02027     tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
02028     tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
02029 
02030     tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
02031     tmp25 = MULTIPLY(z2, - FIX(1.252223920)) + tmp12 + tmp13; /* c4 */
02032 
02033     tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
02034     tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
02035 
02036     tmp23 = MULTIPLY(z2, - FIX(0.170464608)) - tmp12 - tmp13; /* c12 */
02037     tmp24 = MULTIPLY(z2, - FIX(0.803364869)) + tmp12 - tmp13; /* c8 */
02038 
02039     tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
02040 
02041     /* Odd part */
02042 
02043     z1 = (INT32) wsptr[1];
02044     z2 = (INT32) wsptr[3];
02045     z3 = (INT32) wsptr[5];
02046     z4 = (INT32) wsptr[7];
02047 
02048     tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
02049     tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
02050     tmp15 = z1 + z4;
02051     tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
02052     tmp10 = tmp11 + tmp12 + tmp13 -
02053         MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
02054     tmp14 = MULTIPLY(z2 + z3, - FIX(0.338443458));   /* -c11 */
02055     tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
02056     tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
02057     tmp14 = MULTIPLY(z2 + z4, - FIX(1.163874945));   /* -c5 */
02058     tmp11 += tmp14;
02059     tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
02060     tmp14 = MULTIPLY(z3 + z4, - FIX(0.657217813));   /* -c9 */
02061     tmp12 += tmp14;
02062     tmp13 += tmp14;
02063     tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
02064     tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
02065         MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
02066     z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
02067     tmp14 += z1;
02068     tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
02069          MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
02070 
02071     /* Final output stage */
02072 
02073     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
02074                            CONST_BITS+PASS1_BITS+3)
02075                  & RANGE_MASK];
02076     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
02077                            CONST_BITS+PASS1_BITS+3)
02078                  & RANGE_MASK];
02079     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
02080                            CONST_BITS+PASS1_BITS+3)
02081                  & RANGE_MASK];
02082     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
02083                            CONST_BITS+PASS1_BITS+3)
02084                  & RANGE_MASK];
02085     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
02086                            CONST_BITS+PASS1_BITS+3)
02087                  & RANGE_MASK];
02088     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
02089                            CONST_BITS+PASS1_BITS+3)
02090                  & RANGE_MASK];
02091     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
02092                            CONST_BITS+PASS1_BITS+3)
02093                  & RANGE_MASK];
02094     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
02095                            CONST_BITS+PASS1_BITS+3)
02096                  & RANGE_MASK];
02097     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
02098                            CONST_BITS+PASS1_BITS+3)
02099                  & RANGE_MASK];
02100     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
02101                            CONST_BITS+PASS1_BITS+3)
02102                  & RANGE_MASK];
02103     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
02104                            CONST_BITS+PASS1_BITS+3)
02105                  & RANGE_MASK];
02106     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
02107                            CONST_BITS+PASS1_BITS+3)
02108                  & RANGE_MASK];
02109     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26,
02110                            CONST_BITS+PASS1_BITS+3)
02111                  & RANGE_MASK];
02112 
02113     wsptr += 8;     /* advance pointer to next row */
02114   }
02115 }
02116 
02117 
02118 /*
02119  * Perform dequantization and inverse DCT on one block of coefficients,
02120  * producing a 14x14 output block.
02121  *
02122  * Optimized algorithm with 20 multiplications in the 1-D kernel.
02123  * cK represents sqrt(2) * cos(K*pi/28).
02124  */
02125 
02126 GLOBAL(void)
02127 jpeg_idct_14x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
02128          JCOEFPTR coef_block,
02129          JSAMPARRAY output_buf, JDIMENSION output_col)
02130 {
02131   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
02132   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
02133   INT32 z1, z2, z3, z4;
02134   JCOEFPTR inptr;
02135   ISLOW_MULT_TYPE * quantptr;
02136   int * wsptr;
02137   JSAMPROW outptr;
02138   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
02139   int ctr;
02140   int workspace[8*14];  /* buffers data between passes */
02141   SHIFT_TEMPS
02142 
02143   /* Pass 1: process columns from input, store into work array. */
02144 
02145   inptr = coef_block;
02146   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
02147   wsptr = workspace;
02148   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
02149     /* Even part */
02150 
02151     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
02152     z1 <<= CONST_BITS;
02153     /* Add fudge factor here for final descale. */
02154     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
02155     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
02156     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
02157     z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
02158     z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
02159 
02160     tmp10 = z1 + z2;
02161     tmp11 = z1 + z3;
02162     tmp12 = z1 - z4;
02163 
02164     tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
02165             CONST_BITS-PASS1_BITS);
02166 
02167     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
02168     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
02169 
02170     z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
02171 
02172     tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
02173     tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
02174     tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
02175         MULTIPLY(z2, FIX(1.378756276));      /* c2 */
02176 
02177     tmp20 = tmp10 + tmp13;
02178     tmp26 = tmp10 - tmp13;
02179     tmp21 = tmp11 + tmp14;
02180     tmp25 = tmp11 - tmp14;
02181     tmp22 = tmp12 + tmp15;
02182     tmp24 = tmp12 - tmp15;
02183 
02184     /* Odd part */
02185 
02186     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
02187     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
02188     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
02189     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
02190     tmp13 = z4 << CONST_BITS;
02191 
02192     tmp14 = z1 + z3;
02193     tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
02194     tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
02195     tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
02196     tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
02197     tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
02198     z1    -= z2;
02199     tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
02200     tmp16 += tmp15;
02201     z1    += z4;
02202     z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
02203     tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
02204     tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
02205     z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
02206     tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
02207     tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
02208 
02209     tmp13 = (z1 - z3) << PASS1_BITS;
02210 
02211     /* Final output stage */
02212 
02213     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
02214     wsptr[8*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
02215     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
02216     wsptr[8*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
02217     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
02218     wsptr[8*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
02219     wsptr[8*3]  = (int) (tmp23 + tmp13);
02220     wsptr[8*10] = (int) (tmp23 - tmp13);
02221     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
02222     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
02223     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
02224     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
02225     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
02226     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
02227   }
02228 
02229   /* Pass 2: process 14 rows from work array, store into output array. */
02230 
02231   wsptr = workspace;
02232   for (ctr = 0; ctr < 14; ctr++) {
02233     outptr = output_buf[ctr] + output_col;
02234 
02235     /* Even part */
02236 
02237     /* Add range center and fudge factor for final descale and range-limit. */
02238     z1 = (INT32) wsptr[0] +
02239        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
02240         (ONE << (PASS1_BITS+2)));
02241     z1 <<= CONST_BITS;
02242     z4 = (INT32) wsptr[4];
02243     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
02244     z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
02245     z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
02246 
02247     tmp10 = z1 + z2;
02248     tmp11 = z1 + z3;
02249     tmp12 = z1 - z4;
02250 
02251     tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
02252 
02253     z1 = (INT32) wsptr[2];
02254     z2 = (INT32) wsptr[6];
02255 
02256     z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
02257 
02258     tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
02259     tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
02260     tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
02261         MULTIPLY(z2, FIX(1.378756276));      /* c2 */
02262 
02263     tmp20 = tmp10 + tmp13;
02264     tmp26 = tmp10 - tmp13;
02265     tmp21 = tmp11 + tmp14;
02266     tmp25 = tmp11 - tmp14;
02267     tmp22 = tmp12 + tmp15;
02268     tmp24 = tmp12 - tmp15;
02269 
02270     /* Odd part */
02271 
02272     z1 = (INT32) wsptr[1];
02273     z2 = (INT32) wsptr[3];
02274     z3 = (INT32) wsptr[5];
02275     z4 = (INT32) wsptr[7];
02276     z4 <<= CONST_BITS;
02277 
02278     tmp14 = z1 + z3;
02279     tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
02280     tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
02281     tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
02282     tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
02283     tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
02284     z1    -= z2;
02285     tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
02286     tmp16 += tmp15;
02287     tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
02288     tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
02289     tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
02290     tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
02291     tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
02292     tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
02293 
02294     tmp13 = ((z1 - z3) << CONST_BITS) + z4;
02295 
02296     /* Final output stage */
02297 
02298     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
02299                            CONST_BITS+PASS1_BITS+3)
02300                  & RANGE_MASK];
02301     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
02302                            CONST_BITS+PASS1_BITS+3)
02303                  & RANGE_MASK];
02304     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
02305                            CONST_BITS+PASS1_BITS+3)
02306                  & RANGE_MASK];
02307     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
02308                            CONST_BITS+PASS1_BITS+3)
02309                  & RANGE_MASK];
02310     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
02311                            CONST_BITS+PASS1_BITS+3)
02312                  & RANGE_MASK];
02313     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
02314                            CONST_BITS+PASS1_BITS+3)
02315                  & RANGE_MASK];
02316     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
02317                            CONST_BITS+PASS1_BITS+3)
02318                  & RANGE_MASK];
02319     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
02320                            CONST_BITS+PASS1_BITS+3)
02321                  & RANGE_MASK];
02322     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
02323                            CONST_BITS+PASS1_BITS+3)
02324                  & RANGE_MASK];
02325     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
02326                            CONST_BITS+PASS1_BITS+3)
02327                  & RANGE_MASK];
02328     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
02329                            CONST_BITS+PASS1_BITS+3)
02330                  & RANGE_MASK];
02331     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
02332                            CONST_BITS+PASS1_BITS+3)
02333                  & RANGE_MASK];
02334     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
02335                            CONST_BITS+PASS1_BITS+3)
02336                  & RANGE_MASK];
02337     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
02338                            CONST_BITS+PASS1_BITS+3)
02339                  & RANGE_MASK];
02340 
02341     wsptr += 8;     /* advance pointer to next row */
02342   }
02343 }
02344 
02345 
02346 /*
02347  * Perform dequantization and inverse DCT on one block of coefficients,
02348  * producing a 15x15 output block.
02349  *
02350  * Optimized algorithm with 22 multiplications in the 1-D kernel.
02351  * cK represents sqrt(2) * cos(K*pi/30).
02352  */
02353 
02354 GLOBAL(void)
02355 jpeg_idct_15x15 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
02356          JCOEFPTR coef_block,
02357          JSAMPARRAY output_buf, JDIMENSION output_col)
02358 {
02359   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
02360   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
02361   INT32 z1, z2, z3, z4;
02362   JCOEFPTR inptr;
02363   ISLOW_MULT_TYPE * quantptr;
02364   int * wsptr;
02365   JSAMPROW outptr;
02366   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
02367   int ctr;
02368   int workspace[8*15];  /* buffers data between passes */
02369   SHIFT_TEMPS
02370 
02371   /* Pass 1: process columns from input, store into work array. */
02372 
02373   inptr = coef_block;
02374   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
02375   wsptr = workspace;
02376   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
02377     /* Even part */
02378 
02379     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
02380     z1 <<= CONST_BITS;
02381     /* Add fudge factor here for final descale. */
02382     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
02383 
02384     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
02385     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
02386     z4 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
02387 
02388     tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
02389     tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
02390 
02391     tmp12 = z1 - tmp10;
02392     tmp13 = z1 + tmp11;
02393     z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
02394 
02395     z4 = z2 - z3;
02396     z3 += z2;
02397     tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
02398     tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
02399     z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
02400 
02401     tmp20 = tmp13 + tmp10 + tmp11;
02402     tmp23 = tmp12 - tmp10 + tmp11 + z2;
02403 
02404     tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
02405     tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
02406 
02407     tmp25 = tmp13 - tmp10 - tmp11;
02408     tmp26 = tmp12 + tmp10 - tmp11 - z2;
02409 
02410     tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
02411     tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
02412 
02413     tmp21 = tmp12 + tmp10 + tmp11;
02414     tmp24 = tmp13 - tmp10 + tmp11;
02415     tmp11 += tmp11;
02416     tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
02417     tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
02418 
02419     /* Odd part */
02420 
02421     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
02422     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
02423     z4 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
02424     z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
02425     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
02426 
02427     tmp13 = z2 - z4;
02428     tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
02429     tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
02430     tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
02431 
02432     tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
02433     tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
02434     z2 = z1 - z4;
02435     tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
02436 
02437     tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
02438     tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
02439     tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
02440     z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
02441     tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
02442     tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
02443 
02444     /* Final output stage */
02445 
02446     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
02447     wsptr[8*14] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
02448     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
02449     wsptr[8*13] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
02450     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
02451     wsptr[8*12] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
02452     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
02453     wsptr[8*11] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
02454     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
02455     wsptr[8*10] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
02456     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
02457     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
02458     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
02459     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
02460     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27, CONST_BITS-PASS1_BITS);
02461   }
02462 
02463   /* Pass 2: process 15 rows from work array, store into output array. */
02464 
02465   wsptr = workspace;
02466   for (ctr = 0; ctr < 15; ctr++) {
02467     outptr = output_buf[ctr] + output_col;
02468 
02469     /* Even part */
02470 
02471     /* Add range center and fudge factor for final descale and range-limit. */
02472     z1 = (INT32) wsptr[0] +
02473        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
02474         (ONE << (PASS1_BITS+2)));
02475     z1 <<= CONST_BITS;
02476 
02477     z2 = (INT32) wsptr[2];
02478     z3 = (INT32) wsptr[4];
02479     z4 = (INT32) wsptr[6];
02480 
02481     tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
02482     tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
02483 
02484     tmp12 = z1 - tmp10;
02485     tmp13 = z1 + tmp11;
02486     z1 -= (tmp11 - tmp10) << 1;             /* c0 = (c6-c12)*2 */
02487 
02488     z4 = z2 - z3;
02489     z3 += z2;
02490     tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
02491     tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
02492     z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
02493 
02494     tmp20 = tmp13 + tmp10 + tmp11;
02495     tmp23 = tmp12 - tmp10 + tmp11 + z2;
02496 
02497     tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
02498     tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
02499 
02500     tmp25 = tmp13 - tmp10 - tmp11;
02501     tmp26 = tmp12 + tmp10 - tmp11 - z2;
02502 
02503     tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
02504     tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
02505 
02506     tmp21 = tmp12 + tmp10 + tmp11;
02507     tmp24 = tmp13 - tmp10 + tmp11;
02508     tmp11 += tmp11;
02509     tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
02510     tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
02511 
02512     /* Odd part */
02513 
02514     z1 = (INT32) wsptr[1];
02515     z2 = (INT32) wsptr[3];
02516     z4 = (INT32) wsptr[5];
02517     z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
02518     z4 = (INT32) wsptr[7];
02519 
02520     tmp13 = z2 - z4;
02521     tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
02522     tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
02523     tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
02524 
02525     tmp13 = MULTIPLY(z2, - FIX(0.831253876));               /* -c9 */
02526     tmp15 = MULTIPLY(z2, - FIX(1.344997024));               /* -c3 */
02527     z2 = z1 - z4;
02528     tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
02529 
02530     tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
02531     tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
02532     tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
02533     z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
02534     tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
02535     tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
02536 
02537     /* Final output stage */
02538 
02539     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
02540                            CONST_BITS+PASS1_BITS+3)
02541                  & RANGE_MASK];
02542     outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
02543                            CONST_BITS+PASS1_BITS+3)
02544                  & RANGE_MASK];
02545     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
02546                            CONST_BITS+PASS1_BITS+3)
02547                  & RANGE_MASK];
02548     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
02549                            CONST_BITS+PASS1_BITS+3)
02550                  & RANGE_MASK];
02551     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
02552                            CONST_BITS+PASS1_BITS+3)
02553                  & RANGE_MASK];
02554     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
02555                            CONST_BITS+PASS1_BITS+3)
02556                  & RANGE_MASK];
02557     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
02558                            CONST_BITS+PASS1_BITS+3)
02559                  & RANGE_MASK];
02560     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
02561                            CONST_BITS+PASS1_BITS+3)
02562                  & RANGE_MASK];
02563     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
02564                            CONST_BITS+PASS1_BITS+3)
02565                  & RANGE_MASK];
02566     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
02567                            CONST_BITS+PASS1_BITS+3)
02568                  & RANGE_MASK];
02569     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
02570                            CONST_BITS+PASS1_BITS+3)
02571                  & RANGE_MASK];
02572     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
02573                            CONST_BITS+PASS1_BITS+3)
02574                  & RANGE_MASK];
02575     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
02576                            CONST_BITS+PASS1_BITS+3)
02577                  & RANGE_MASK];
02578     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
02579                            CONST_BITS+PASS1_BITS+3)
02580                  & RANGE_MASK];
02581     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27,
02582                            CONST_BITS+PASS1_BITS+3)
02583                  & RANGE_MASK];
02584 
02585     wsptr += 8;     /* advance pointer to next row */
02586   }
02587 }
02588 
02589 
02590 /*
02591  * Perform dequantization and inverse DCT on one block of coefficients,
02592  * producing a 16x16 output block.
02593  *
02594  * Optimized algorithm with 28 multiplications in the 1-D kernel.
02595  * cK represents sqrt(2) * cos(K*pi/32).
02596  */
02597 
02598 GLOBAL(void)
02599 jpeg_idct_16x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
02600          JCOEFPTR coef_block,
02601          JSAMPARRAY output_buf, JDIMENSION output_col)
02602 {
02603   INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
02604   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
02605   INT32 z1, z2, z3, z4;
02606   JCOEFPTR inptr;
02607   ISLOW_MULT_TYPE * quantptr;
02608   int * wsptr;
02609   JSAMPROW outptr;
02610   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
02611   int ctr;
02612   int workspace[8*16];  /* buffers data between passes */
02613   SHIFT_TEMPS
02614 
02615   /* Pass 1: process columns from input, store into work array. */
02616 
02617   inptr = coef_block;
02618   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
02619   wsptr = workspace;
02620   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
02621     /* Even part */
02622 
02623     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
02624     tmp0 <<= CONST_BITS;
02625     /* Add fudge factor here for final descale. */
02626     tmp0 += 1 << (CONST_BITS-PASS1_BITS-1);
02627 
02628     z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
02629     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
02630     tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
02631 
02632     tmp10 = tmp0 + tmp1;
02633     tmp11 = tmp0 - tmp1;
02634     tmp12 = tmp0 + tmp2;
02635     tmp13 = tmp0 - tmp2;
02636 
02637     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
02638     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
02639     z3 = z1 - z2;
02640     z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
02641     z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
02642 
02643     tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
02644     tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
02645     tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
02646     tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
02647 
02648     tmp20 = tmp10 + tmp0;
02649     tmp27 = tmp10 - tmp0;
02650     tmp21 = tmp12 + tmp1;
02651     tmp26 = tmp12 - tmp1;
02652     tmp22 = tmp13 + tmp2;
02653     tmp25 = tmp13 - tmp2;
02654     tmp23 = tmp11 + tmp3;
02655     tmp24 = tmp11 - tmp3;
02656 
02657     /* Odd part */
02658 
02659     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
02660     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
02661     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
02662     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
02663 
02664     tmp11 = z1 + z3;
02665 
02666     tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
02667     tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
02668     tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
02669     tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
02670     tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
02671     tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
02672     tmp0  = tmp1 + tmp2 + tmp3 -
02673         MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
02674     tmp13 = tmp10 + tmp11 + tmp12 -
02675         MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
02676     z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
02677     tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
02678     tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
02679     z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
02680     tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
02681     tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
02682     z2    += z4;
02683     z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
02684     tmp1  += z1;
02685     tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
02686     z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
02687     tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
02688     tmp12 += z2;
02689     z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
02690     tmp2  += z2;
02691     tmp3  += z2;
02692     z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
02693     tmp10 += z2;
02694     tmp11 += z2;
02695 
02696     /* Final output stage */
02697 
02698     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
02699     wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
02700     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
02701     wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
02702     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
02703     wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
02704     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
02705     wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
02706     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
02707     wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
02708     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
02709     wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
02710     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
02711     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
02712     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
02713     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
02714   }
02715 
02716   /* Pass 2: process 16 rows from work array, store into output array. */
02717 
02718   wsptr = workspace;
02719   for (ctr = 0; ctr < 16; ctr++) {
02720     outptr = output_buf[ctr] + output_col;
02721 
02722     /* Even part */
02723 
02724     /* Add range center and fudge factor for final descale and range-limit. */
02725     tmp0 = (INT32) wsptr[0] +
02726          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
02727           (ONE << (PASS1_BITS+2)));
02728     tmp0 <<= CONST_BITS;
02729 
02730     z1 = (INT32) wsptr[4];
02731     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
02732     tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
02733 
02734     tmp10 = tmp0 + tmp1;
02735     tmp11 = tmp0 - tmp1;
02736     tmp12 = tmp0 + tmp2;
02737     tmp13 = tmp0 - tmp2;
02738 
02739     z1 = (INT32) wsptr[2];
02740     z2 = (INT32) wsptr[6];
02741     z3 = z1 - z2;
02742     z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
02743     z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
02744 
02745     tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
02746     tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
02747     tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
02748     tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
02749 
02750     tmp20 = tmp10 + tmp0;
02751     tmp27 = tmp10 - tmp0;
02752     tmp21 = tmp12 + tmp1;
02753     tmp26 = tmp12 - tmp1;
02754     tmp22 = tmp13 + tmp2;
02755     tmp25 = tmp13 - tmp2;
02756     tmp23 = tmp11 + tmp3;
02757     tmp24 = tmp11 - tmp3;
02758 
02759     /* Odd part */
02760 
02761     z1 = (INT32) wsptr[1];
02762     z2 = (INT32) wsptr[3];
02763     z3 = (INT32) wsptr[5];
02764     z4 = (INT32) wsptr[7];
02765 
02766     tmp11 = z1 + z3;
02767 
02768     tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
02769     tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
02770     tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
02771     tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
02772     tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
02773     tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
02774     tmp0  = tmp1 + tmp2 + tmp3 -
02775         MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
02776     tmp13 = tmp10 + tmp11 + tmp12 -
02777         MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
02778     z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
02779     tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
02780     tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
02781     z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
02782     tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
02783     tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
02784     z2    += z4;
02785     z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
02786     tmp1  += z1;
02787     tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
02788     z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
02789     tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
02790     tmp12 += z2;
02791     z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
02792     tmp2  += z2;
02793     tmp3  += z2;
02794     z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
02795     tmp10 += z2;
02796     tmp11 += z2;
02797 
02798     /* Final output stage */
02799 
02800     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
02801                            CONST_BITS+PASS1_BITS+3)
02802                  & RANGE_MASK];
02803     outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
02804                            CONST_BITS+PASS1_BITS+3)
02805                  & RANGE_MASK];
02806     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
02807                            CONST_BITS+PASS1_BITS+3)
02808                  & RANGE_MASK];
02809     outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
02810                            CONST_BITS+PASS1_BITS+3)
02811                  & RANGE_MASK];
02812     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
02813                            CONST_BITS+PASS1_BITS+3)
02814                  & RANGE_MASK];
02815     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
02816                            CONST_BITS+PASS1_BITS+3)
02817                  & RANGE_MASK];
02818     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
02819                            CONST_BITS+PASS1_BITS+3)
02820                  & RANGE_MASK];
02821     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
02822                            CONST_BITS+PASS1_BITS+3)
02823                  & RANGE_MASK];
02824     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
02825                            CONST_BITS+PASS1_BITS+3)
02826                  & RANGE_MASK];
02827     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
02828                            CONST_BITS+PASS1_BITS+3)
02829                  & RANGE_MASK];
02830     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
02831                            CONST_BITS+PASS1_BITS+3)
02832                  & RANGE_MASK];
02833     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
02834                            CONST_BITS+PASS1_BITS+3)
02835                  & RANGE_MASK];
02836     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
02837                            CONST_BITS+PASS1_BITS+3)
02838                  & RANGE_MASK];
02839     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
02840                            CONST_BITS+PASS1_BITS+3)
02841                  & RANGE_MASK];
02842     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
02843                            CONST_BITS+PASS1_BITS+3)
02844                  & RANGE_MASK];
02845     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
02846                            CONST_BITS+PASS1_BITS+3)
02847                  & RANGE_MASK];
02848 
02849     wsptr += 8;     /* advance pointer to next row */
02850   }
02851 }
02852 
02853 
02854 /*
02855  * Perform dequantization and inverse DCT on one block of coefficients,
02856  * producing a 16x8 output block.
02857  *
02858  * 8-point IDCT in pass 1 (columns), 16-point in pass 2 (rows).
02859  */
02860 
02861 GLOBAL(void)
02862 jpeg_idct_16x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
02863         JCOEFPTR coef_block,
02864         JSAMPARRAY output_buf, JDIMENSION output_col)
02865 {
02866   INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
02867   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
02868   INT32 z1, z2, z3, z4;
02869   JCOEFPTR inptr;
02870   ISLOW_MULT_TYPE * quantptr;
02871   int * wsptr;
02872   JSAMPROW outptr;
02873   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
02874   int ctr;
02875   int workspace[8*8];   /* buffers data between passes */
02876   SHIFT_TEMPS
02877 
02878   /* Pass 1: process columns from input, store into work array.
02879    * Note results are scaled up by sqrt(8) compared to a true IDCT;
02880    * furthermore, we scale the results by 2**PASS1_BITS.
02881    * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
02882    */
02883 
02884   inptr = coef_block;
02885   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
02886   wsptr = workspace;
02887   for (ctr = DCTSIZE; ctr > 0; ctr--) {
02888     /* Due to quantization, we will usually find that many of the input
02889      * coefficients are zero, especially the AC terms.  We can exploit this
02890      * by short-circuiting the IDCT calculation for any column in which all
02891      * the AC terms are zero.  In that case each output is equal to the
02892      * DC coefficient (with scale factor as needed).
02893      * With typical images and quantization tables, half or more of the
02894      * column DCT calculations can be simplified this way.
02895      */
02896 
02897     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
02898     inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
02899     inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
02900     inptr[DCTSIZE*7] == 0) {
02901       /* AC terms all zero */
02902       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
02903 
02904       wsptr[DCTSIZE*0] = dcval;
02905       wsptr[DCTSIZE*1] = dcval;
02906       wsptr[DCTSIZE*2] = dcval;
02907       wsptr[DCTSIZE*3] = dcval;
02908       wsptr[DCTSIZE*4] = dcval;
02909       wsptr[DCTSIZE*5] = dcval;
02910       wsptr[DCTSIZE*6] = dcval;
02911       wsptr[DCTSIZE*7] = dcval;
02912 
02913       inptr++;          /* advance pointers to next column */
02914       quantptr++;
02915       wsptr++;
02916       continue;
02917     }
02918 
02919     /* Even part: reverse the even part of the forward DCT.
02920      * The rotator is c(-6).
02921      */
02922 
02923     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
02924     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
02925 
02926     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
02927     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
02928     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
02929 
02930     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
02931     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
02932     z2 <<= CONST_BITS;
02933     z3 <<= CONST_BITS;
02934     /* Add fudge factor here for final descale. */
02935     z2 += ONE << (CONST_BITS-PASS1_BITS-1);
02936 
02937     tmp0 = z2 + z3;
02938     tmp1 = z2 - z3;
02939 
02940     tmp10 = tmp0 + tmp2;
02941     tmp13 = tmp0 - tmp2;
02942     tmp11 = tmp1 + tmp3;
02943     tmp12 = tmp1 - tmp3;
02944 
02945     /* Odd part per figure 8; the matrix is unitary and hence its
02946      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
02947      */
02948 
02949     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
02950     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
02951     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
02952     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
02953 
02954     z2 = tmp0 + tmp2;
02955     z3 = tmp1 + tmp3;
02956 
02957     z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
02958     z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
02959     z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
02960     z2 += z1;
02961     z3 += z1;
02962 
02963     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
02964     tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
02965     tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
02966     tmp0 += z1 + z2;
02967     tmp3 += z1 + z3;
02968 
02969     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
02970     tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
02971     tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
02972     tmp1 += z1 + z3;
02973     tmp2 += z1 + z2;
02974 
02975     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
02976 
02977     wsptr[DCTSIZE*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
02978     wsptr[DCTSIZE*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
02979     wsptr[DCTSIZE*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
02980     wsptr[DCTSIZE*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
02981     wsptr[DCTSIZE*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
02982     wsptr[DCTSIZE*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
02983     wsptr[DCTSIZE*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
02984     wsptr[DCTSIZE*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
02985 
02986     inptr++;            /* advance pointers to next column */
02987     quantptr++;
02988     wsptr++;
02989   }
02990 
02991   /* Pass 2: process 8 rows from work array, store into output array.
02992    * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
02993    */
02994 
02995   wsptr = workspace;
02996   for (ctr = 0; ctr < 8; ctr++) {
02997     outptr = output_buf[ctr] + output_col;
02998 
02999     /* Even part */
03000 
03001     /* Add range center and fudge factor for final descale and range-limit. */
03002     tmp0 = (INT32) wsptr[0] +
03003          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
03004           (ONE << (PASS1_BITS+2)));
03005     tmp0 <<= CONST_BITS;
03006 
03007     z1 = (INT32) wsptr[4];
03008     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
03009     tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
03010 
03011     tmp10 = tmp0 + tmp1;
03012     tmp11 = tmp0 - tmp1;
03013     tmp12 = tmp0 + tmp2;
03014     tmp13 = tmp0 - tmp2;
03015 
03016     z1 = (INT32) wsptr[2];
03017     z2 = (INT32) wsptr[6];
03018     z3 = z1 - z2;
03019     z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
03020     z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
03021 
03022     tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
03023     tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
03024     tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
03025     tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
03026 
03027     tmp20 = tmp10 + tmp0;
03028     tmp27 = tmp10 - tmp0;
03029     tmp21 = tmp12 + tmp1;
03030     tmp26 = tmp12 - tmp1;
03031     tmp22 = tmp13 + tmp2;
03032     tmp25 = tmp13 - tmp2;
03033     tmp23 = tmp11 + tmp3;
03034     tmp24 = tmp11 - tmp3;
03035 
03036     /* Odd part */
03037 
03038     z1 = (INT32) wsptr[1];
03039     z2 = (INT32) wsptr[3];
03040     z3 = (INT32) wsptr[5];
03041     z4 = (INT32) wsptr[7];
03042 
03043     tmp11 = z1 + z3;
03044 
03045     tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
03046     tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
03047     tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
03048     tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
03049     tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
03050     tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
03051     tmp0  = tmp1 + tmp2 + tmp3 -
03052         MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
03053     tmp13 = tmp10 + tmp11 + tmp12 -
03054         MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
03055     z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
03056     tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
03057     tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
03058     z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
03059     tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
03060     tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
03061     z2    += z4;
03062     z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
03063     tmp1  += z1;
03064     tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
03065     z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
03066     tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
03067     tmp12 += z2;
03068     z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
03069     tmp2  += z2;
03070     tmp3  += z2;
03071     z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
03072     tmp10 += z2;
03073     tmp11 += z2;
03074 
03075     /* Final output stage */
03076 
03077     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp0,
03078                            CONST_BITS+PASS1_BITS+3)
03079                  & RANGE_MASK];
03080     outptr[15] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp0,
03081                            CONST_BITS+PASS1_BITS+3)
03082                  & RANGE_MASK];
03083     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp1,
03084                            CONST_BITS+PASS1_BITS+3)
03085                  & RANGE_MASK];
03086     outptr[14] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp1,
03087                            CONST_BITS+PASS1_BITS+3)
03088                  & RANGE_MASK];
03089     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp2,
03090                            CONST_BITS+PASS1_BITS+3)
03091                  & RANGE_MASK];
03092     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp2,
03093                            CONST_BITS+PASS1_BITS+3)
03094                  & RANGE_MASK];
03095     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp3,
03096                            CONST_BITS+PASS1_BITS+3)
03097                  & RANGE_MASK];
03098     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp3,
03099                            CONST_BITS+PASS1_BITS+3)
03100                  & RANGE_MASK];
03101     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp10,
03102                            CONST_BITS+PASS1_BITS+3)
03103                  & RANGE_MASK];
03104     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp10,
03105                            CONST_BITS+PASS1_BITS+3)
03106                  & RANGE_MASK];
03107     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp11,
03108                            CONST_BITS+PASS1_BITS+3)
03109                  & RANGE_MASK];
03110     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp11,
03111                            CONST_BITS+PASS1_BITS+3)
03112                  & RANGE_MASK];
03113     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp12,
03114                            CONST_BITS+PASS1_BITS+3)
03115                  & RANGE_MASK];
03116     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp12,
03117                            CONST_BITS+PASS1_BITS+3)
03118                  & RANGE_MASK];
03119     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp27 + tmp13,
03120                            CONST_BITS+PASS1_BITS+3)
03121                  & RANGE_MASK];
03122     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp27 - tmp13,
03123                            CONST_BITS+PASS1_BITS+3)
03124                  & RANGE_MASK];
03125 
03126     wsptr += 8;     /* advance pointer to next row */
03127   }
03128 }
03129 
03130 
03131 /*
03132  * Perform dequantization and inverse DCT on one block of coefficients,
03133  * producing a 14x7 output block.
03134  *
03135  * 7-point IDCT in pass 1 (columns), 14-point in pass 2 (rows).
03136  */
03137 
03138 GLOBAL(void)
03139 jpeg_idct_14x7 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
03140         JCOEFPTR coef_block,
03141         JSAMPARRAY output_buf, JDIMENSION output_col)
03142 {
03143   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
03144   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
03145   INT32 z1, z2, z3, z4;
03146   JCOEFPTR inptr;
03147   ISLOW_MULT_TYPE * quantptr;
03148   int * wsptr;
03149   JSAMPROW outptr;
03150   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
03151   int ctr;
03152   int workspace[8*7];   /* buffers data between passes */
03153   SHIFT_TEMPS
03154 
03155   /* Pass 1: process columns from input, store into work array.
03156    * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
03157    */
03158 
03159   inptr = coef_block;
03160   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
03161   wsptr = workspace;
03162   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
03163     /* Even part */
03164 
03165     tmp23 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
03166     tmp23 <<= CONST_BITS;
03167     /* Add fudge factor here for final descale. */
03168     tmp23 += ONE << (CONST_BITS-PASS1_BITS-1);
03169 
03170     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
03171     z2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
03172     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
03173 
03174     tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
03175     tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
03176     tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
03177     tmp10 = z1 + z3;
03178     z2 -= tmp10;
03179     tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
03180     tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
03181     tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
03182     tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
03183 
03184     /* Odd part */
03185 
03186     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
03187     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
03188     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
03189 
03190     tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
03191     tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
03192     tmp10 = tmp11 - tmp12;
03193     tmp11 += tmp12;
03194     tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
03195     tmp11 += tmp12;
03196     z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
03197     tmp10 += z2;
03198     tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
03199 
03200     /* Final output stage */
03201 
03202     wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
03203     wsptr[8*6] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
03204     wsptr[8*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
03205     wsptr[8*5] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
03206     wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
03207     wsptr[8*4] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
03208     wsptr[8*3] = (int) RIGHT_SHIFT(tmp23, CONST_BITS-PASS1_BITS);
03209   }
03210 
03211   /* Pass 2: process 7 rows from work array, store into output array.
03212    * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
03213    */
03214 
03215   wsptr = workspace;
03216   for (ctr = 0; ctr < 7; ctr++) {
03217     outptr = output_buf[ctr] + output_col;
03218 
03219     /* Even part */
03220 
03221     /* Add range center and fudge factor for final descale and range-limit. */
03222     z1 = (INT32) wsptr[0] +
03223        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
03224         (ONE << (PASS1_BITS+2)));
03225     z1 <<= CONST_BITS;
03226     z4 = (INT32) wsptr[4];
03227     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
03228     z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
03229     z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
03230 
03231     tmp10 = z1 + z2;
03232     tmp11 = z1 + z3;
03233     tmp12 = z1 - z4;
03234 
03235     tmp23 = z1 - ((z2 + z3 - z4) << 1);          /* c0 = (c4+c12-c8)*2 */
03236 
03237     z1 = (INT32) wsptr[2];
03238     z2 = (INT32) wsptr[6];
03239 
03240     z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
03241 
03242     tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
03243     tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
03244     tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
03245         MULTIPLY(z2, FIX(1.378756276));      /* c2 */
03246 
03247     tmp20 = tmp10 + tmp13;
03248     tmp26 = tmp10 - tmp13;
03249     tmp21 = tmp11 + tmp14;
03250     tmp25 = tmp11 - tmp14;
03251     tmp22 = tmp12 + tmp15;
03252     tmp24 = tmp12 - tmp15;
03253 
03254     /* Odd part */
03255 
03256     z1 = (INT32) wsptr[1];
03257     z2 = (INT32) wsptr[3];
03258     z3 = (INT32) wsptr[5];
03259     z4 = (INT32) wsptr[7];
03260     z4 <<= CONST_BITS;
03261 
03262     tmp14 = z1 + z3;
03263     tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
03264     tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
03265     tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
03266     tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
03267     tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
03268     z1    -= z2;
03269     tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
03270     tmp16 += tmp15;
03271     tmp13 = MULTIPLY(z2 + z3, - FIX(0.158341681)) - z4;    /* -c13 */
03272     tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
03273     tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
03274     tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
03275     tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
03276     tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
03277 
03278     tmp13 = ((z1 - z3) << CONST_BITS) + z4;
03279 
03280     /* Final output stage */
03281 
03282     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
03283                            CONST_BITS+PASS1_BITS+3)
03284                  & RANGE_MASK];
03285     outptr[13] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
03286                            CONST_BITS+PASS1_BITS+3)
03287                  & RANGE_MASK];
03288     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
03289                            CONST_BITS+PASS1_BITS+3)
03290                  & RANGE_MASK];
03291     outptr[12] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
03292                            CONST_BITS+PASS1_BITS+3)
03293                  & RANGE_MASK];
03294     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
03295                            CONST_BITS+PASS1_BITS+3)
03296                  & RANGE_MASK];
03297     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
03298                            CONST_BITS+PASS1_BITS+3)
03299                  & RANGE_MASK];
03300     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
03301                            CONST_BITS+PASS1_BITS+3)
03302                  & RANGE_MASK];
03303     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
03304                            CONST_BITS+PASS1_BITS+3)
03305                  & RANGE_MASK];
03306     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
03307                            CONST_BITS+PASS1_BITS+3)
03308                  & RANGE_MASK];
03309     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
03310                            CONST_BITS+PASS1_BITS+3)
03311                  & RANGE_MASK];
03312     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
03313                            CONST_BITS+PASS1_BITS+3)
03314                  & RANGE_MASK];
03315     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
03316                            CONST_BITS+PASS1_BITS+3)
03317                  & RANGE_MASK];
03318     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp26 + tmp16,
03319                            CONST_BITS+PASS1_BITS+3)
03320                  & RANGE_MASK];
03321     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp26 - tmp16,
03322                            CONST_BITS+PASS1_BITS+3)
03323                  & RANGE_MASK];
03324 
03325     wsptr += 8;     /* advance pointer to next row */
03326   }
03327 }
03328 
03329 
03330 /*
03331  * Perform dequantization and inverse DCT on one block of coefficients,
03332  * producing a 12x6 output block.
03333  *
03334  * 6-point IDCT in pass 1 (columns), 12-point in pass 2 (rows).
03335  */
03336 
03337 GLOBAL(void)
03338 jpeg_idct_12x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
03339         JCOEFPTR coef_block,
03340         JSAMPARRAY output_buf, JDIMENSION output_col)
03341 {
03342   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
03343   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
03344   INT32 z1, z2, z3, z4;
03345   JCOEFPTR inptr;
03346   ISLOW_MULT_TYPE * quantptr;
03347   int * wsptr;
03348   JSAMPROW outptr;
03349   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
03350   int ctr;
03351   int workspace[8*6];   /* buffers data between passes */
03352   SHIFT_TEMPS
03353 
03354   /* Pass 1: process columns from input, store into work array.
03355    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
03356    */
03357 
03358   inptr = coef_block;
03359   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
03360   wsptr = workspace;
03361   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
03362     /* Even part */
03363 
03364     tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
03365     tmp10 <<= CONST_BITS;
03366     /* Add fudge factor here for final descale. */
03367     tmp10 += ONE << (CONST_BITS-PASS1_BITS-1);
03368     tmp12 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
03369     tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
03370     tmp11 = tmp10 + tmp20;
03371     tmp21 = RIGHT_SHIFT(tmp10 - tmp20 - tmp20, CONST_BITS-PASS1_BITS);
03372     tmp20 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
03373     tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
03374     tmp20 = tmp11 + tmp10;
03375     tmp22 = tmp11 - tmp10;
03376 
03377     /* Odd part */
03378 
03379     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
03380     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
03381     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
03382     tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
03383     tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
03384     tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
03385     tmp11 = (z1 - z2 - z3) << PASS1_BITS;
03386 
03387     /* Final output stage */
03388 
03389     wsptr[8*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
03390     wsptr[8*5] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
03391     wsptr[8*1] = (int) (tmp21 + tmp11);
03392     wsptr[8*4] = (int) (tmp21 - tmp11);
03393     wsptr[8*2] = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
03394     wsptr[8*3] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
03395   }
03396 
03397   /* Pass 2: process 6 rows from work array, store into output array.
03398    * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
03399    */
03400 
03401   wsptr = workspace;
03402   for (ctr = 0; ctr < 6; ctr++) {
03403     outptr = output_buf[ctr] + output_col;
03404 
03405     /* Even part */
03406 
03407     /* Add range center and fudge factor for final descale and range-limit. */
03408     z3 = (INT32) wsptr[0] +
03409        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
03410         (ONE << (PASS1_BITS+2)));
03411     z3 <<= CONST_BITS;
03412 
03413     z4 = (INT32) wsptr[4];
03414     z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
03415 
03416     tmp10 = z3 + z4;
03417     tmp11 = z3 - z4;
03418 
03419     z1 = (INT32) wsptr[2];
03420     z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
03421     z1 <<= CONST_BITS;
03422     z2 = (INT32) wsptr[6];
03423     z2 <<= CONST_BITS;
03424 
03425     tmp12 = z1 - z2;
03426 
03427     tmp21 = z3 + tmp12;
03428     tmp24 = z3 - tmp12;
03429 
03430     tmp12 = z4 + z2;
03431 
03432     tmp20 = tmp10 + tmp12;
03433     tmp25 = tmp10 - tmp12;
03434 
03435     tmp12 = z4 - z1 - z2;
03436 
03437     tmp22 = tmp11 + tmp12;
03438     tmp23 = tmp11 - tmp12;
03439 
03440     /* Odd part */
03441 
03442     z1 = (INT32) wsptr[1];
03443     z2 = (INT32) wsptr[3];
03444     z3 = (INT32) wsptr[5];
03445     z4 = (INT32) wsptr[7];
03446 
03447     tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
03448     tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
03449 
03450     tmp10 = z1 + z3;
03451     tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
03452     tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
03453     tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
03454     tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
03455     tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
03456     tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
03457     tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
03458          MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
03459 
03460     z1 -= z4;
03461     z2 -= z3;
03462     z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
03463     tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
03464     tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
03465 
03466     /* Final output stage */
03467 
03468     outptr[0]  = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
03469                            CONST_BITS+PASS1_BITS+3)
03470                  & RANGE_MASK];
03471     outptr[11] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
03472                            CONST_BITS+PASS1_BITS+3)
03473                  & RANGE_MASK];
03474     outptr[1]  = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
03475                            CONST_BITS+PASS1_BITS+3)
03476                  & RANGE_MASK];
03477     outptr[10] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
03478                            CONST_BITS+PASS1_BITS+3)
03479                  & RANGE_MASK];
03480     outptr[2]  = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
03481                            CONST_BITS+PASS1_BITS+3)
03482                  & RANGE_MASK];
03483     outptr[9]  = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
03484                            CONST_BITS+PASS1_BITS+3)
03485                  & RANGE_MASK];
03486     outptr[3]  = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
03487                            CONST_BITS+PASS1_BITS+3)
03488                  & RANGE_MASK];
03489     outptr[8]  = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
03490                            CONST_BITS+PASS1_BITS+3)
03491                  & RANGE_MASK];
03492     outptr[4]  = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
03493                            CONST_BITS+PASS1_BITS+3)
03494                  & RANGE_MASK];
03495     outptr[7]  = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
03496                            CONST_BITS+PASS1_BITS+3)
03497                  & RANGE_MASK];
03498     outptr[5]  = range_limit[(int) RIGHT_SHIFT(tmp25 + tmp15,
03499                            CONST_BITS+PASS1_BITS+3)
03500                  & RANGE_MASK];
03501     outptr[6]  = range_limit[(int) RIGHT_SHIFT(tmp25 - tmp15,
03502                            CONST_BITS+PASS1_BITS+3)
03503                  & RANGE_MASK];
03504 
03505     wsptr += 8;     /* advance pointer to next row */
03506   }
03507 }
03508 
03509 
03510 /*
03511  * Perform dequantization and inverse DCT on one block of coefficients,
03512  * producing a 10x5 output block.
03513  *
03514  * 5-point IDCT in pass 1 (columns), 10-point in pass 2 (rows).
03515  */
03516 
03517 GLOBAL(void)
03518 jpeg_idct_10x5 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
03519         JCOEFPTR coef_block,
03520         JSAMPARRAY output_buf, JDIMENSION output_col)
03521 {
03522   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
03523   INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
03524   INT32 z1, z2, z3, z4;
03525   JCOEFPTR inptr;
03526   ISLOW_MULT_TYPE * quantptr;
03527   int * wsptr;
03528   JSAMPROW outptr;
03529   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
03530   int ctr;
03531   int workspace[8*5];   /* buffers data between passes */
03532   SHIFT_TEMPS
03533 
03534   /* Pass 1: process columns from input, store into work array.
03535    * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
03536    */
03537 
03538   inptr = coef_block;
03539   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
03540   wsptr = workspace;
03541   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
03542     /* Even part */
03543 
03544     tmp12 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
03545     tmp12 <<= CONST_BITS;
03546     /* Add fudge factor here for final descale. */
03547     tmp12 += ONE << (CONST_BITS-PASS1_BITS-1);
03548     tmp13 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
03549     tmp14 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
03550     z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
03551     z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
03552     z3 = tmp12 + z2;
03553     tmp10 = z3 + z1;
03554     tmp11 = z3 - z1;
03555     tmp12 -= z2 << 2;
03556 
03557     /* Odd part */
03558 
03559     z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
03560     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
03561 
03562     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
03563     tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
03564     tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
03565 
03566     /* Final output stage */
03567 
03568     wsptr[8*0] = (int) RIGHT_SHIFT(tmp10 + tmp13, CONST_BITS-PASS1_BITS);
03569     wsptr[8*4] = (int) RIGHT_SHIFT(tmp10 - tmp13, CONST_BITS-PASS1_BITS);
03570     wsptr[8*1] = (int) RIGHT_SHIFT(tmp11 + tmp14, CONST_BITS-PASS1_BITS);
03571     wsptr[8*3] = (int) RIGHT_SHIFT(tmp11 - tmp14, CONST_BITS-PASS1_BITS);
03572     wsptr[8*2] = (int) RIGHT_SHIFT(tmp12, CONST_BITS-PASS1_BITS);
03573   }
03574 
03575   /* Pass 2: process 5 rows from work array, store into output array.
03576    * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
03577    */
03578 
03579   wsptr = workspace;
03580   for (ctr = 0; ctr < 5; ctr++) {
03581     outptr = output_buf[ctr] + output_col;
03582 
03583     /* Even part */
03584 
03585     /* Add range center and fudge factor for final descale and range-limit. */
03586     z3 = (INT32) wsptr[0] +
03587        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
03588         (ONE << (PASS1_BITS+2)));
03589     z3 <<= CONST_BITS;
03590     z4 = (INT32) wsptr[4];
03591     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
03592     z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
03593     tmp10 = z3 + z1;
03594     tmp11 = z3 - z2;
03595 
03596     tmp22 = z3 - ((z1 - z2) << 1);               /* c0 = (c4-c8)*2 */
03597 
03598     z2 = (INT32) wsptr[2];
03599     z3 = (INT32) wsptr[6];
03600 
03601     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
03602     tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
03603     tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
03604 
03605     tmp20 = tmp10 + tmp12;
03606     tmp24 = tmp10 - tmp12;
03607     tmp21 = tmp11 + tmp13;
03608     tmp23 = tmp11 - tmp13;
03609 
03610     /* Odd part */
03611 
03612     z1 = (INT32) wsptr[1];
03613     z2 = (INT32) wsptr[3];
03614     z3 = (INT32) wsptr[5];
03615     z3 <<= CONST_BITS;
03616     z4 = (INT32) wsptr[7];
03617 
03618     tmp11 = z2 + z4;
03619     tmp13 = z2 - z4;
03620 
03621     tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
03622 
03623     z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
03624     z4 = z3 + tmp12;
03625 
03626     tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
03627     tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
03628 
03629     z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
03630     z4 = z3 - tmp12 - (tmp13 << (CONST_BITS - 1));
03631 
03632     tmp12 = ((z1 - tmp13) << CONST_BITS) - z3;
03633 
03634     tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
03635     tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
03636 
03637     /* Final output stage */
03638 
03639     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
03640                           CONST_BITS+PASS1_BITS+3)
03641                 & RANGE_MASK];
03642     outptr[9] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
03643                           CONST_BITS+PASS1_BITS+3)
03644                 & RANGE_MASK];
03645     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
03646                           CONST_BITS+PASS1_BITS+3)
03647                 & RANGE_MASK];
03648     outptr[8] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
03649                           CONST_BITS+PASS1_BITS+3)
03650                 & RANGE_MASK];
03651     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
03652                           CONST_BITS+PASS1_BITS+3)
03653                 & RANGE_MASK];
03654     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
03655                           CONST_BITS+PASS1_BITS+3)
03656                 & RANGE_MASK];
03657     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23 + tmp13,
03658                           CONST_BITS+PASS1_BITS+3)
03659                 & RANGE_MASK];
03660     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp23 - tmp13,
03661                           CONST_BITS+PASS1_BITS+3)
03662                 & RANGE_MASK];
03663     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp24 + tmp14,
03664                           CONST_BITS+PASS1_BITS+3)
03665                 & RANGE_MASK];
03666     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp24 - tmp14,
03667                           CONST_BITS+PASS1_BITS+3)
03668                 & RANGE_MASK];
03669 
03670     wsptr += 8;     /* advance pointer to next row */
03671   }
03672 }
03673 
03674 
03675 /*
03676  * Perform dequantization and inverse DCT on one block of coefficients,
03677  * producing a 8x4 output block.
03678  *
03679  * 4-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
03680  */
03681 
03682 GLOBAL(void)
03683 jpeg_idct_8x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
03684            JCOEFPTR coef_block,
03685            JSAMPARRAY output_buf, JDIMENSION output_col)
03686 {
03687   INT32 tmp0, tmp1, tmp2, tmp3;
03688   INT32 tmp10, tmp11, tmp12, tmp13;
03689   INT32 z1, z2, z3;
03690   JCOEFPTR inptr;
03691   ISLOW_MULT_TYPE * quantptr;
03692   int * wsptr;
03693   JSAMPROW outptr;
03694   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
03695   int ctr;
03696   int workspace[8*4];   /* buffers data between passes */
03697   SHIFT_TEMPS
03698 
03699   /* Pass 1: process columns from input, store into work array.
03700    * 4-point IDCT kernel,
03701    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
03702    */
03703 
03704   inptr = coef_block;
03705   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
03706   wsptr = workspace;
03707   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
03708     /* Even part */
03709 
03710     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
03711     tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
03712 
03713     tmp10 = (tmp0 + tmp2) << PASS1_BITS;
03714     tmp12 = (tmp0 - tmp2) << PASS1_BITS;
03715 
03716     /* Odd part */
03717     /* Same rotation as in the even part of the 8x8 LL&M IDCT */
03718 
03719     z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
03720     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
03721 
03722     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);               /* c6 */
03723     /* Add fudge factor here for final descale. */
03724     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
03725     tmp0 = RIGHT_SHIFT(z1 + MULTIPLY(z2, FIX_0_765366865), /* c2-c6 */
03726                CONST_BITS-PASS1_BITS);
03727     tmp2 = RIGHT_SHIFT(z1 - MULTIPLY(z3, FIX_1_847759065), /* c2+c6 */
03728                CONST_BITS-PASS1_BITS);
03729 
03730     /* Final output stage */
03731 
03732     wsptr[8*0] = (int) (tmp10 + tmp0);
03733     wsptr[8*3] = (int) (tmp10 - tmp0);
03734     wsptr[8*1] = (int) (tmp12 + tmp2);
03735     wsptr[8*2] = (int) (tmp12 - tmp2);
03736   }
03737 
03738   /* Pass 2: process rows from work array, store into output array.
03739    * Note that we must descale the results by a factor of 8 == 2**3,
03740    * and also undo the PASS1_BITS scaling.
03741    * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
03742    */
03743 
03744   wsptr = workspace;
03745   for (ctr = 0; ctr < 4; ctr++) {
03746     outptr = output_buf[ctr] + output_col;
03747 
03748     /* Even part: reverse the even part of the forward DCT.
03749      * The rotator is c(-6).
03750      */
03751 
03752     /* Add range center and fudge factor for final descale and range-limit. */
03753     z2 = (INT32) wsptr[0] +
03754        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
03755         (ONE << (PASS1_BITS+2)));
03756     z3 = (INT32) wsptr[4];
03757 
03758     tmp0 = (z2 + z3) << CONST_BITS;
03759     tmp1 = (z2 - z3) << CONST_BITS;
03760 
03761     z2 = (INT32) wsptr[2];
03762     z3 = (INT32) wsptr[6];
03763 
03764     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
03765     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
03766     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
03767 
03768     tmp10 = tmp0 + tmp2;
03769     tmp13 = tmp0 - tmp2;
03770     tmp11 = tmp1 + tmp3;
03771     tmp12 = tmp1 - tmp3;
03772 
03773     /* Odd part per figure 8; the matrix is unitary and hence its
03774      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
03775      */
03776 
03777     tmp0 = (INT32) wsptr[7];
03778     tmp1 = (INT32) wsptr[5];
03779     tmp2 = (INT32) wsptr[3];
03780     tmp3 = (INT32) wsptr[1];
03781 
03782     z2 = tmp0 + tmp2;
03783     z3 = tmp1 + tmp3;
03784 
03785     z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
03786     z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
03787     z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
03788     z2 += z1;
03789     z3 += z1;
03790 
03791     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
03792     tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
03793     tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
03794     tmp0 += z1 + z2;
03795     tmp3 += z1 + z3;
03796 
03797     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
03798     tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
03799     tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
03800     tmp1 += z1 + z3;
03801     tmp2 += z1 + z2;
03802 
03803     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
03804 
03805     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
03806                           CONST_BITS+PASS1_BITS+3)
03807                 & RANGE_MASK];
03808     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
03809                           CONST_BITS+PASS1_BITS+3)
03810                 & RANGE_MASK];
03811     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
03812                           CONST_BITS+PASS1_BITS+3)
03813                 & RANGE_MASK];
03814     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
03815                           CONST_BITS+PASS1_BITS+3)
03816                 & RANGE_MASK];
03817     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
03818                           CONST_BITS+PASS1_BITS+3)
03819                 & RANGE_MASK];
03820     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
03821                           CONST_BITS+PASS1_BITS+3)
03822                 & RANGE_MASK];
03823     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
03824                           CONST_BITS+PASS1_BITS+3)
03825                 & RANGE_MASK];
03826     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
03827                           CONST_BITS+PASS1_BITS+3)
03828                 & RANGE_MASK];
03829 
03830     wsptr += DCTSIZE;       /* advance pointer to next row */
03831   }
03832 }
03833 
03834 
03835 /*
03836  * Perform dequantization and inverse DCT on one block of coefficients,
03837  * producing a reduced-size 6x3 output block.
03838  *
03839  * 3-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
03840  */
03841 
03842 GLOBAL(void)
03843 jpeg_idct_6x3 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
03844            JCOEFPTR coef_block,
03845            JSAMPARRAY output_buf, JDIMENSION output_col)
03846 {
03847   INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
03848   INT32 z1, z2, z3;
03849   JCOEFPTR inptr;
03850   ISLOW_MULT_TYPE * quantptr;
03851   int * wsptr;
03852   JSAMPROW outptr;
03853   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
03854   int ctr;
03855   int workspace[6*3];   /* buffers data between passes */
03856   SHIFT_TEMPS
03857 
03858   /* Pass 1: process columns from input, store into work array.
03859    * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
03860    */
03861 
03862   inptr = coef_block;
03863   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
03864   wsptr = workspace;
03865   for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
03866     /* Even part */
03867 
03868     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
03869     tmp0 <<= CONST_BITS;
03870     /* Add fudge factor here for final descale. */
03871     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
03872     tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
03873     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
03874     tmp10 = tmp0 + tmp12;
03875     tmp2 = tmp0 - tmp12 - tmp12;
03876 
03877     /* Odd part */
03878 
03879     tmp12 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
03880     tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
03881 
03882     /* Final output stage */
03883 
03884     wsptr[6*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
03885     wsptr[6*2] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
03886     wsptr[6*1] = (int) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
03887   }
03888   
03889   /* Pass 2: process 3 rows from work array, store into output array.
03890    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
03891    */
03892 
03893   wsptr = workspace;
03894   for (ctr = 0; ctr < 3; ctr++) {
03895     outptr = output_buf[ctr] + output_col;
03896 
03897     /* Even part */
03898 
03899     /* Add range center and fudge factor for final descale and range-limit. */
03900     tmp0 = (INT32) wsptr[0] +
03901          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
03902           (ONE << (PASS1_BITS+2)));
03903     tmp0 <<= CONST_BITS;
03904     tmp2 = (INT32) wsptr[4];
03905     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
03906     tmp1 = tmp0 + tmp10;
03907     tmp11 = tmp0 - tmp10 - tmp10;
03908     tmp10 = (INT32) wsptr[2];
03909     tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
03910     tmp10 = tmp1 + tmp0;
03911     tmp12 = tmp1 - tmp0;
03912 
03913     /* Odd part */
03914 
03915     z1 = (INT32) wsptr[1];
03916     z2 = (INT32) wsptr[3];
03917     z3 = (INT32) wsptr[5];
03918     tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
03919     tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
03920     tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
03921     tmp1 = (z1 - z2 - z3) << CONST_BITS;
03922 
03923     /* Final output stage */
03924 
03925     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
03926                           CONST_BITS+PASS1_BITS+3)
03927                 & RANGE_MASK];
03928     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
03929                           CONST_BITS+PASS1_BITS+3)
03930                 & RANGE_MASK];
03931     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp1,
03932                           CONST_BITS+PASS1_BITS+3)
03933                 & RANGE_MASK];
03934     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp1,
03935                           CONST_BITS+PASS1_BITS+3)
03936                 & RANGE_MASK];
03937     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
03938                           CONST_BITS+PASS1_BITS+3)
03939                 & RANGE_MASK];
03940     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
03941                           CONST_BITS+PASS1_BITS+3)
03942                 & RANGE_MASK];
03943 
03944     wsptr += 6;     /* advance pointer to next row */
03945   }
03946 }
03947 
03948 
03949 /*
03950  * Perform dequantization and inverse DCT on one block of coefficients,
03951  * producing a 4x2 output block.
03952  *
03953  * 2-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
03954  */
03955 
03956 GLOBAL(void)
03957 jpeg_idct_4x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
03958            JCOEFPTR coef_block,
03959            JSAMPARRAY output_buf, JDIMENSION output_col)
03960 {
03961   INT32 tmp0, tmp2, tmp10, tmp12;
03962   INT32 z1, z2, z3;
03963   JCOEFPTR inptr;
03964   ISLOW_MULT_TYPE * quantptr;
03965   INT32 * wsptr;
03966   JSAMPROW outptr;
03967   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
03968   int ctr;
03969   INT32 workspace[4*2]; /* buffers data between passes */
03970   SHIFT_TEMPS
03971 
03972   /* Pass 1: process columns from input, store into work array. */
03973 
03974   inptr = coef_block;
03975   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
03976   wsptr = workspace;
03977   for (ctr = 0; ctr < 4; ctr++, inptr++, quantptr++, wsptr++) {
03978     /* Even part */
03979 
03980     tmp10 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
03981 
03982     /* Odd part */
03983 
03984     tmp0 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
03985 
03986     /* Final output stage */
03987 
03988     wsptr[4*0] = tmp10 + tmp0;
03989     wsptr[4*1] = tmp10 - tmp0;
03990   }
03991 
03992   /* Pass 2: process 2 rows from work array, store into output array.
03993    * 4-point IDCT kernel,
03994    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
03995    */
03996 
03997   wsptr = workspace;
03998   for (ctr = 0; ctr < 2; ctr++) {
03999     outptr = output_buf[ctr] + output_col;
04000 
04001     /* Even part */
04002 
04003     /* Add range center and fudge factor for final descale and range-limit. */
04004     tmp0 = wsptr[0] + ((((INT32) RANGE_CENTER) << 3) + (ONE << 2));
04005     tmp2 = wsptr[2];
04006 
04007     tmp10 = (tmp0 + tmp2) << CONST_BITS;
04008     tmp12 = (tmp0 - tmp2) << CONST_BITS;
04009 
04010     /* Odd part */
04011     /* Same rotation as in the even part of the 8x8 LL&M IDCT */
04012 
04013     z2 = wsptr[1];
04014     z3 = wsptr[3];
04015 
04016     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
04017     tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
04018     tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
04019 
04020     /* Final output stage */
04021 
04022     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
04023                           CONST_BITS+3)
04024                 & RANGE_MASK];
04025     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
04026                           CONST_BITS+3)
04027                 & RANGE_MASK];
04028     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
04029                           CONST_BITS+3)
04030                 & RANGE_MASK];
04031     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
04032                           CONST_BITS+3)
04033                 & RANGE_MASK];
04034 
04035     wsptr += 4;     /* advance pointer to next row */
04036   }
04037 }
04038 
04039 
04040 /*
04041  * Perform dequantization and inverse DCT on one block of coefficients,
04042  * producing a 2x1 output block.
04043  *
04044  * 1-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
04045  */
04046 
04047 GLOBAL(void)
04048 jpeg_idct_2x1 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
04049            JCOEFPTR coef_block,
04050            JSAMPARRAY output_buf, JDIMENSION output_col)
04051 {
04052   DCTELEM tmp0, tmp1;
04053   ISLOW_MULT_TYPE * quantptr;
04054   JSAMPROW outptr;
04055   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
04056   ISHIFT_TEMPS
04057 
04058   /* Pass 1: empty. */
04059 
04060   /* Pass 2: process 1 row from input, store into output array. */
04061 
04062   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
04063   outptr = output_buf[0] + output_col;
04064 
04065   /* Even part */
04066 
04067   tmp0 = DEQUANTIZE(coef_block[0], quantptr[0]);
04068   /* Add range center and fudge factor for final descale and range-limit. */
04069   tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
04070 
04071   /* Odd part */
04072 
04073   tmp1 = DEQUANTIZE(coef_block[1], quantptr[1]);
04074 
04075   /* Final output stage */
04076 
04077   outptr[0] = range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
04078   outptr[1] = range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
04079 }
04080 
04081 
04082 /*
04083  * Perform dequantization and inverse DCT on one block of coefficients,
04084  * producing a 8x16 output block.
04085  *
04086  * 16-point IDCT in pass 1 (columns), 8-point in pass 2 (rows).
04087  */
04088 
04089 GLOBAL(void)
04090 jpeg_idct_8x16 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
04091         JCOEFPTR coef_block,
04092         JSAMPARRAY output_buf, JDIMENSION output_col)
04093 {
04094   INT32 tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
04095   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
04096   INT32 z1, z2, z3, z4;
04097   JCOEFPTR inptr;
04098   ISLOW_MULT_TYPE * quantptr;
04099   int * wsptr;
04100   JSAMPROW outptr;
04101   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
04102   int ctr;
04103   int workspace[8*16];  /* buffers data between passes */
04104   SHIFT_TEMPS
04105 
04106   /* Pass 1: process columns from input, store into work array.
04107    * 16-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
04108    */
04109 
04110   inptr = coef_block;
04111   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
04112   wsptr = workspace;
04113   for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
04114     /* Even part */
04115 
04116     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
04117     tmp0 <<= CONST_BITS;
04118     /* Add fudge factor here for final descale. */
04119     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
04120 
04121     z1 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
04122     tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
04123     tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
04124 
04125     tmp10 = tmp0 + tmp1;
04126     tmp11 = tmp0 - tmp1;
04127     tmp12 = tmp0 + tmp2;
04128     tmp13 = tmp0 - tmp2;
04129 
04130     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
04131     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
04132     z3 = z1 - z2;
04133     z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
04134     z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
04135 
04136     tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
04137     tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
04138     tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
04139     tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
04140 
04141     tmp20 = tmp10 + tmp0;
04142     tmp27 = tmp10 - tmp0;
04143     tmp21 = tmp12 + tmp1;
04144     tmp26 = tmp12 - tmp1;
04145     tmp22 = tmp13 + tmp2;
04146     tmp25 = tmp13 - tmp2;
04147     tmp23 = tmp11 + tmp3;
04148     tmp24 = tmp11 - tmp3;
04149 
04150     /* Odd part */
04151 
04152     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
04153     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
04154     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
04155     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
04156 
04157     tmp11 = z1 + z3;
04158 
04159     tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
04160     tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
04161     tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
04162     tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
04163     tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
04164     tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
04165     tmp0  = tmp1 + tmp2 + tmp3 -
04166         MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
04167     tmp13 = tmp10 + tmp11 + tmp12 -
04168         MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
04169     z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
04170     tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
04171     tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
04172     z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
04173     tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
04174     tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
04175     z2    += z4;
04176     z1    = MULTIPLY(z2, - FIX(0.666655658));      /* -c11 */
04177     tmp1  += z1;
04178     tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
04179     z2    = MULTIPLY(z2, - FIX(1.247225013));      /* -c5 */
04180     tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
04181     tmp12 += z2;
04182     z2    = MULTIPLY(z3 + z4, - FIX(1.353318001)); /* -c3 */
04183     tmp2  += z2;
04184     tmp3  += z2;
04185     z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
04186     tmp10 += z2;
04187     tmp11 += z2;
04188 
04189     /* Final output stage */
04190 
04191     wsptr[8*0]  = (int) RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS-PASS1_BITS);
04192     wsptr[8*15] = (int) RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS-PASS1_BITS);
04193     wsptr[8*1]  = (int) RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS-PASS1_BITS);
04194     wsptr[8*14] = (int) RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS-PASS1_BITS);
04195     wsptr[8*2]  = (int) RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS-PASS1_BITS);
04196     wsptr[8*13] = (int) RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS-PASS1_BITS);
04197     wsptr[8*3]  = (int) RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS-PASS1_BITS);
04198     wsptr[8*12] = (int) RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS-PASS1_BITS);
04199     wsptr[8*4]  = (int) RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS-PASS1_BITS);
04200     wsptr[8*11] = (int) RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS-PASS1_BITS);
04201     wsptr[8*5]  = (int) RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS-PASS1_BITS);
04202     wsptr[8*10] = (int) RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS-PASS1_BITS);
04203     wsptr[8*6]  = (int) RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS-PASS1_BITS);
04204     wsptr[8*9]  = (int) RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS-PASS1_BITS);
04205     wsptr[8*7]  = (int) RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS-PASS1_BITS);
04206     wsptr[8*8]  = (int) RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS-PASS1_BITS);
04207   }
04208 
04209   /* Pass 2: process rows from work array, store into output array.
04210    * Note that we must descale the results by a factor of 8 == 2**3,
04211    * and also undo the PASS1_BITS scaling.
04212    * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
04213    */
04214 
04215   wsptr = workspace;
04216   for (ctr = 0; ctr < 16; ctr++) {
04217     outptr = output_buf[ctr] + output_col;
04218 
04219     /* Even part: reverse the even part of the forward DCT.
04220      * The rotator is c(-6).
04221      */
04222 
04223     /* Add range center and fudge factor for final descale and range-limit. */
04224     z2 = (INT32) wsptr[0] +
04225        ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
04226         (ONE << (PASS1_BITS+2)));
04227     z3 = (INT32) wsptr[4];
04228 
04229     tmp0 = (z2 + z3) << CONST_BITS;
04230     tmp1 = (z2 - z3) << CONST_BITS;
04231 
04232     z2 = (INT32) wsptr[2];
04233     z3 = (INT32) wsptr[6];
04234 
04235     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
04236     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
04237     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
04238 
04239     tmp10 = tmp0 + tmp2;
04240     tmp13 = tmp0 - tmp2;
04241     tmp11 = tmp1 + tmp3;
04242     tmp12 = tmp1 - tmp3;
04243 
04244     /* Odd part per figure 8; the matrix is unitary and hence its
04245      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
04246      */
04247 
04248     tmp0 = (INT32) wsptr[7];
04249     tmp1 = (INT32) wsptr[5];
04250     tmp2 = (INT32) wsptr[3];
04251     tmp3 = (INT32) wsptr[1];
04252 
04253     z2 = tmp0 + tmp2;
04254     z3 = tmp1 + tmp3;
04255 
04256     z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
04257     z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
04258     z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
04259     z2 += z1;
04260     z3 += z1;
04261 
04262     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
04263     tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
04264     tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
04265     tmp0 += z1 + z2;
04266     tmp3 += z1 + z3;
04267 
04268     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
04269     tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
04270     tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
04271     tmp1 += z1 + z3;
04272     tmp2 += z1 + z2;
04273 
04274     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
04275 
04276     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp3,
04277                           CONST_BITS+PASS1_BITS+3)
04278                 & RANGE_MASK];
04279     outptr[7] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp3,
04280                           CONST_BITS+PASS1_BITS+3)
04281                 & RANGE_MASK];
04282     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp2,
04283                           CONST_BITS+PASS1_BITS+3)
04284                 & RANGE_MASK];
04285     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp2,
04286                           CONST_BITS+PASS1_BITS+3)
04287                 & RANGE_MASK];
04288     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp1,
04289                           CONST_BITS+PASS1_BITS+3)
04290                 & RANGE_MASK];
04291     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp1,
04292                           CONST_BITS+PASS1_BITS+3)
04293                 & RANGE_MASK];
04294     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp13 + tmp0,
04295                           CONST_BITS+PASS1_BITS+3)
04296                 & RANGE_MASK];
04297     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp13 - tmp0,
04298                           CONST_BITS+PASS1_BITS+3)
04299                 & RANGE_MASK];
04300 
04301     wsptr += DCTSIZE;       /* advance pointer to next row */
04302   }
04303 }
04304 
04305 
04306 /*
04307  * Perform dequantization and inverse DCT on one block of coefficients,
04308  * producing a 7x14 output block.
04309  *
04310  * 14-point IDCT in pass 1 (columns), 7-point in pass 2 (rows).
04311  */
04312 
04313 GLOBAL(void)
04314 jpeg_idct_7x14 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
04315         JCOEFPTR coef_block,
04316         JSAMPARRAY output_buf, JDIMENSION output_col)
04317 {
04318   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
04319   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
04320   INT32 z1, z2, z3, z4;
04321   JCOEFPTR inptr;
04322   ISLOW_MULT_TYPE * quantptr;
04323   int * wsptr;
04324   JSAMPROW outptr;
04325   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
04326   int ctr;
04327   int workspace[7*14];  /* buffers data between passes */
04328   SHIFT_TEMPS
04329 
04330   /* Pass 1: process columns from input, store into work array.
04331    * 14-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
04332    */
04333 
04334   inptr = coef_block;
04335   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
04336   wsptr = workspace;
04337   for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
04338     /* Even part */
04339 
04340     z1 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
04341     z1 <<= CONST_BITS;
04342     /* Add fudge factor here for final descale. */
04343     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
04344     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
04345     z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
04346     z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
04347     z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
04348 
04349     tmp10 = z1 + z2;
04350     tmp11 = z1 + z3;
04351     tmp12 = z1 - z4;
04352 
04353     tmp23 = RIGHT_SHIFT(z1 - ((z2 + z3 - z4) << 1), /* c0 = (c4+c12-c8)*2 */
04354             CONST_BITS-PASS1_BITS);
04355 
04356     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
04357     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
04358 
04359     z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
04360 
04361     tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
04362     tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
04363     tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
04364         MULTIPLY(z2, FIX(1.378756276));      /* c2 */
04365 
04366     tmp20 = tmp10 + tmp13;
04367     tmp26 = tmp10 - tmp13;
04368     tmp21 = tmp11 + tmp14;
04369     tmp25 = tmp11 - tmp14;
04370     tmp22 = tmp12 + tmp15;
04371     tmp24 = tmp12 - tmp15;
04372 
04373     /* Odd part */
04374 
04375     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
04376     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
04377     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
04378     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
04379     tmp13 = z4 << CONST_BITS;
04380 
04381     tmp14 = z1 + z3;
04382     tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
04383     tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
04384     tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
04385     tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
04386     tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
04387     z1    -= z2;
04388     tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
04389     tmp16 += tmp15;
04390     z1    += z4;
04391     z4    = MULTIPLY(z2 + z3, - FIX(0.158341681)) - tmp13; /* -c13 */
04392     tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
04393     tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
04394     z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
04395     tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
04396     tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
04397 
04398     tmp13 = (z1 - z3) << PASS1_BITS;
04399 
04400     /* Final output stage */
04401 
04402     wsptr[7*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
04403     wsptr[7*13] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
04404     wsptr[7*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
04405     wsptr[7*12] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
04406     wsptr[7*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
04407     wsptr[7*11] = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
04408     wsptr[7*3]  = (int) (tmp23 + tmp13);
04409     wsptr[7*10] = (int) (tmp23 - tmp13);
04410     wsptr[7*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
04411     wsptr[7*9]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
04412     wsptr[7*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
04413     wsptr[7*8]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
04414     wsptr[7*6]  = (int) RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS-PASS1_BITS);
04415     wsptr[7*7]  = (int) RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS-PASS1_BITS);
04416   }
04417 
04418   /* Pass 2: process 14 rows from work array, store into output array.
04419    * 7-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
04420    */
04421 
04422   wsptr = workspace;
04423   for (ctr = 0; ctr < 14; ctr++) {
04424     outptr = output_buf[ctr] + output_col;
04425 
04426     /* Even part */
04427 
04428     /* Add range center and fudge factor for final descale and range-limit. */
04429     tmp23 = (INT32) wsptr[0] +
04430           ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
04431            (ONE << (PASS1_BITS+2)));
04432     tmp23 <<= CONST_BITS;
04433 
04434     z1 = (INT32) wsptr[2];
04435     z2 = (INT32) wsptr[4];
04436     z3 = (INT32) wsptr[6];
04437 
04438     tmp20 = MULTIPLY(z2 - z3, FIX(0.881747734));       /* c4 */
04439     tmp22 = MULTIPLY(z1 - z2, FIX(0.314692123));       /* c6 */
04440     tmp21 = tmp20 + tmp22 + tmp23 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
04441     tmp10 = z1 + z3;
04442     z2 -= tmp10;
04443     tmp10 = MULTIPLY(tmp10, FIX(1.274162392)) + tmp23; /* c2 */
04444     tmp20 += tmp10 - MULTIPLY(z3, FIX(0.077722536));   /* c2-c4-c6 */
04445     tmp22 += tmp10 - MULTIPLY(z1, FIX(2.470602249));   /* c2+c4+c6 */
04446     tmp23 += MULTIPLY(z2, FIX(1.414213562));           /* c0 */
04447 
04448     /* Odd part */
04449 
04450     z1 = (INT32) wsptr[1];
04451     z2 = (INT32) wsptr[3];
04452     z3 = (INT32) wsptr[5];
04453 
04454     tmp11 = MULTIPLY(z1 + z2, FIX(0.935414347));       /* (c3+c1-c5)/2 */
04455     tmp12 = MULTIPLY(z1 - z2, FIX(0.170262339));       /* (c3+c5-c1)/2 */
04456     tmp10 = tmp11 - tmp12;
04457     tmp11 += tmp12;
04458     tmp12 = MULTIPLY(z2 + z3, - FIX(1.378756276));     /* -c1 */
04459     tmp11 += tmp12;
04460     z2 = MULTIPLY(z1 + z3, FIX(0.613604268));          /* c5 */
04461     tmp10 += z2;
04462     tmp12 += z2 + MULTIPLY(z3, FIX(1.870828693));      /* c3+c1-c5 */
04463 
04464     /* Final output stage */
04465 
04466     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
04467                           CONST_BITS+PASS1_BITS+3)
04468                 & RANGE_MASK];
04469     outptr[6] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
04470                           CONST_BITS+PASS1_BITS+3)
04471                 & RANGE_MASK];
04472     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
04473                           CONST_BITS+PASS1_BITS+3)
04474                 & RANGE_MASK];
04475     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
04476                           CONST_BITS+PASS1_BITS+3)
04477                 & RANGE_MASK];
04478     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
04479                           CONST_BITS+PASS1_BITS+3)
04480                 & RANGE_MASK];
04481     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
04482                           CONST_BITS+PASS1_BITS+3)
04483                 & RANGE_MASK];
04484     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp23,
04485                           CONST_BITS+PASS1_BITS+3)
04486                 & RANGE_MASK];
04487 
04488     wsptr += 7;     /* advance pointer to next row */
04489   }
04490 }
04491 
04492 
04493 /*
04494  * Perform dequantization and inverse DCT on one block of coefficients,
04495  * producing a 6x12 output block.
04496  *
04497  * 12-point IDCT in pass 1 (columns), 6-point in pass 2 (rows).
04498  */
04499 
04500 GLOBAL(void)
04501 jpeg_idct_6x12 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
04502         JCOEFPTR coef_block,
04503         JSAMPARRAY output_buf, JDIMENSION output_col)
04504 {
04505   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
04506   INT32 tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
04507   INT32 z1, z2, z3, z4;
04508   JCOEFPTR inptr;
04509   ISLOW_MULT_TYPE * quantptr;
04510   int * wsptr;
04511   JSAMPROW outptr;
04512   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
04513   int ctr;
04514   int workspace[6*12];  /* buffers data between passes */
04515   SHIFT_TEMPS
04516 
04517   /* Pass 1: process columns from input, store into work array.
04518    * 12-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
04519    */
04520 
04521   inptr = coef_block;
04522   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
04523   wsptr = workspace;
04524   for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
04525     /* Even part */
04526 
04527     z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
04528     z3 <<= CONST_BITS;
04529     /* Add fudge factor here for final descale. */
04530     z3 += ONE << (CONST_BITS-PASS1_BITS-1);
04531 
04532     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
04533     z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
04534 
04535     tmp10 = z3 + z4;
04536     tmp11 = z3 - z4;
04537 
04538     z1 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
04539     z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
04540     z1 <<= CONST_BITS;
04541     z2 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
04542     z2 <<= CONST_BITS;
04543 
04544     tmp12 = z1 - z2;
04545 
04546     tmp21 = z3 + tmp12;
04547     tmp24 = z3 - tmp12;
04548 
04549     tmp12 = z4 + z2;
04550 
04551     tmp20 = tmp10 + tmp12;
04552     tmp25 = tmp10 - tmp12;
04553 
04554     tmp12 = z4 - z1 - z2;
04555 
04556     tmp22 = tmp11 + tmp12;
04557     tmp23 = tmp11 - tmp12;
04558 
04559     /* Odd part */
04560 
04561     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
04562     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
04563     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
04564     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
04565 
04566     tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
04567     tmp14 = MULTIPLY(z2, - FIX_0_541196100);                 /* -c9 */
04568 
04569     tmp10 = z1 + z3;
04570     tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
04571     tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
04572     tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
04573     tmp13 = MULTIPLY(z3 + z4, - FIX(1.045510580));           /* -(c7+c11) */
04574     tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
04575     tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
04576     tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
04577          MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
04578 
04579     z1 -= z4;
04580     z2 -= z3;
04581     z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
04582     tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
04583     tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
04584 
04585     /* Final output stage */
04586 
04587     wsptr[6*0]  = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
04588     wsptr[6*11] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
04589     wsptr[6*1]  = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
04590     wsptr[6*10] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
04591     wsptr[6*2]  = (int) RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS-PASS1_BITS);
04592     wsptr[6*9]  = (int) RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS-PASS1_BITS);
04593     wsptr[6*3]  = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
04594     wsptr[6*8]  = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
04595     wsptr[6*4]  = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
04596     wsptr[6*7]  = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
04597     wsptr[6*5]  = (int) RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS-PASS1_BITS);
04598     wsptr[6*6]  = (int) RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS-PASS1_BITS);
04599   }
04600 
04601   /* Pass 2: process 12 rows from work array, store into output array.
04602    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
04603    */
04604 
04605   wsptr = workspace;
04606   for (ctr = 0; ctr < 12; ctr++) {
04607     outptr = output_buf[ctr] + output_col;
04608 
04609     /* Even part */
04610 
04611     /* Add range center and fudge factor for final descale and range-limit. */
04612     tmp10 = (INT32) wsptr[0] +
04613           ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
04614            (ONE << (PASS1_BITS+2)));
04615     tmp10 <<= CONST_BITS;
04616     tmp12 = (INT32) wsptr[4];
04617     tmp20 = MULTIPLY(tmp12, FIX(0.707106781));   /* c4 */
04618     tmp11 = tmp10 + tmp20;
04619     tmp21 = tmp10 - tmp20 - tmp20;
04620     tmp20 = (INT32) wsptr[2];
04621     tmp10 = MULTIPLY(tmp20, FIX(1.224744871));   /* c2 */
04622     tmp20 = tmp11 + tmp10;
04623     tmp22 = tmp11 - tmp10;
04624 
04625     /* Odd part */
04626 
04627     z1 = (INT32) wsptr[1];
04628     z2 = (INT32) wsptr[3];
04629     z3 = (INT32) wsptr[5];
04630     tmp11 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
04631     tmp10 = tmp11 + ((z1 + z2) << CONST_BITS);
04632     tmp12 = tmp11 + ((z3 - z2) << CONST_BITS);
04633     tmp11 = (z1 - z2 - z3) << CONST_BITS;
04634 
04635     /* Final output stage */
04636 
04637     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp20 + tmp10,
04638                           CONST_BITS+PASS1_BITS+3)
04639                 & RANGE_MASK];
04640     outptr[5] = range_limit[(int) RIGHT_SHIFT(tmp20 - tmp10,
04641                           CONST_BITS+PASS1_BITS+3)
04642                 & RANGE_MASK];
04643     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp21 + tmp11,
04644                           CONST_BITS+PASS1_BITS+3)
04645                 & RANGE_MASK];
04646     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp21 - tmp11,
04647                           CONST_BITS+PASS1_BITS+3)
04648                 & RANGE_MASK];
04649     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp22 + tmp12,
04650                           CONST_BITS+PASS1_BITS+3)
04651                 & RANGE_MASK];
04652     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp22 - tmp12,
04653                           CONST_BITS+PASS1_BITS+3)
04654                 & RANGE_MASK];
04655 
04656     wsptr += 6;     /* advance pointer to next row */
04657   }
04658 }
04659 
04660 
04661 /*
04662  * Perform dequantization and inverse DCT on one block of coefficients,
04663  * producing a 5x10 output block.
04664  *
04665  * 10-point IDCT in pass 1 (columns), 5-point in pass 2 (rows).
04666  */
04667 
04668 GLOBAL(void)
04669 jpeg_idct_5x10 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
04670         JCOEFPTR coef_block,
04671         JSAMPARRAY output_buf, JDIMENSION output_col)
04672 {
04673   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
04674   INT32 tmp20, tmp21, tmp22, tmp23, tmp24;
04675   INT32 z1, z2, z3, z4, z5;
04676   JCOEFPTR inptr;
04677   ISLOW_MULT_TYPE * quantptr;
04678   int * wsptr;
04679   JSAMPROW outptr;
04680   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
04681   int ctr;
04682   int workspace[5*10];  /* buffers data between passes */
04683   SHIFT_TEMPS
04684 
04685   /* Pass 1: process columns from input, store into work array.
04686    * 10-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
04687    */
04688 
04689   inptr = coef_block;
04690   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
04691   wsptr = workspace;
04692   for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
04693     /* Even part */
04694 
04695     z3 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
04696     z3 <<= CONST_BITS;
04697     /* Add fudge factor here for final descale. */
04698     z3 += ONE << (CONST_BITS-PASS1_BITS-1);
04699     z4 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
04700     z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
04701     z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
04702     tmp10 = z3 + z1;
04703     tmp11 = z3 - z2;
04704 
04705     tmp22 = RIGHT_SHIFT(z3 - ((z1 - z2) << 1),   /* c0 = (c4-c8)*2 */
04706             CONST_BITS-PASS1_BITS);
04707 
04708     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
04709     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
04710 
04711     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
04712     tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
04713     tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
04714 
04715     tmp20 = tmp10 + tmp12;
04716     tmp24 = tmp10 - tmp12;
04717     tmp21 = tmp11 + tmp13;
04718     tmp23 = tmp11 - tmp13;
04719 
04720     /* Odd part */
04721 
04722     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
04723     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
04724     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
04725     z4 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
04726 
04727     tmp11 = z2 + z4;
04728     tmp13 = z2 - z4;
04729 
04730     tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
04731     z5 = z3 << CONST_BITS;
04732 
04733     z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
04734     z4 = z5 + tmp12;
04735 
04736     tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
04737     tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
04738 
04739     z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
04740     z4 = z5 - tmp12 - (tmp13 << (CONST_BITS - 1));
04741 
04742     tmp12 = (z1 - tmp13 - z3) << PASS1_BITS;
04743 
04744     tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
04745     tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
04746 
04747     /* Final output stage */
04748 
04749     wsptr[5*0] = (int) RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS-PASS1_BITS);
04750     wsptr[5*9] = (int) RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS-PASS1_BITS);
04751     wsptr[5*1] = (int) RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS-PASS1_BITS);
04752     wsptr[5*8] = (int) RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS-PASS1_BITS);
04753     wsptr[5*2] = (int) (tmp22 + tmp12);
04754     wsptr[5*7] = (int) (tmp22 - tmp12);
04755     wsptr[5*3] = (int) RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS-PASS1_BITS);
04756     wsptr[5*6] = (int) RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS-PASS1_BITS);
04757     wsptr[5*4] = (int) RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS-PASS1_BITS);
04758     wsptr[5*5] = (int) RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS-PASS1_BITS);
04759   }
04760 
04761   /* Pass 2: process 10 rows from work array, store into output array.
04762    * 5-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
04763    */
04764 
04765   wsptr = workspace;
04766   for (ctr = 0; ctr < 10; ctr++) {
04767     outptr = output_buf[ctr] + output_col;
04768 
04769     /* Even part */
04770 
04771     /* Add range center and fudge factor for final descale and range-limit. */
04772     tmp12 = (INT32) wsptr[0] +
04773           ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
04774            (ONE << (PASS1_BITS+2)));
04775     tmp12 <<= CONST_BITS;
04776     tmp13 = (INT32) wsptr[2];
04777     tmp14 = (INT32) wsptr[4];
04778     z1 = MULTIPLY(tmp13 + tmp14, FIX(0.790569415)); /* (c2+c4)/2 */
04779     z2 = MULTIPLY(tmp13 - tmp14, FIX(0.353553391)); /* (c2-c4)/2 */
04780     z3 = tmp12 + z2;
04781     tmp10 = z3 + z1;
04782     tmp11 = z3 - z1;
04783     tmp12 -= z2 << 2;
04784 
04785     /* Odd part */
04786 
04787     z2 = (INT32) wsptr[1];
04788     z3 = (INT32) wsptr[3];
04789 
04790     z1 = MULTIPLY(z2 + z3, FIX(0.831253876));       /* c3 */
04791     tmp13 = z1 + MULTIPLY(z2, FIX(0.513743148));    /* c1-c3 */
04792     tmp14 = z1 - MULTIPLY(z3, FIX(2.176250899));    /* c1+c3 */
04793 
04794     /* Final output stage */
04795 
04796     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp13,
04797                           CONST_BITS+PASS1_BITS+3)
04798                 & RANGE_MASK];
04799     outptr[4] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp13,
04800                           CONST_BITS+PASS1_BITS+3)
04801                 & RANGE_MASK];
04802     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp11 + tmp14,
04803                           CONST_BITS+PASS1_BITS+3)
04804                 & RANGE_MASK];
04805     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp11 - tmp14,
04806                           CONST_BITS+PASS1_BITS+3)
04807                 & RANGE_MASK];
04808     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12,
04809                           CONST_BITS+PASS1_BITS+3)
04810                 & RANGE_MASK];
04811 
04812     wsptr += 5;     /* advance pointer to next row */
04813   }
04814 }
04815 
04816 
04817 /*
04818  * Perform dequantization and inverse DCT on one block of coefficients,
04819  * producing a 4x8 output block.
04820  *
04821  * 8-point IDCT in pass 1 (columns), 4-point in pass 2 (rows).
04822  */
04823 
04824 GLOBAL(void)
04825 jpeg_idct_4x8 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
04826            JCOEFPTR coef_block,
04827            JSAMPARRAY output_buf, JDIMENSION output_col)
04828 {
04829   INT32 tmp0, tmp1, tmp2, tmp3;
04830   INT32 tmp10, tmp11, tmp12, tmp13;
04831   INT32 z1, z2, z3;
04832   JCOEFPTR inptr;
04833   ISLOW_MULT_TYPE * quantptr;
04834   int * wsptr;
04835   JSAMPROW outptr;
04836   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
04837   int ctr;
04838   int workspace[4*8];   /* buffers data between passes */
04839   SHIFT_TEMPS
04840 
04841   /* Pass 1: process columns from input, store into work array.
04842    * Note results are scaled up by sqrt(8) compared to a true IDCT;
04843    * furthermore, we scale the results by 2**PASS1_BITS.
04844    * 8-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
04845    */
04846 
04847   inptr = coef_block;
04848   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
04849   wsptr = workspace;
04850   for (ctr = 4; ctr > 0; ctr--) {
04851     /* Due to quantization, we will usually find that many of the input
04852      * coefficients are zero, especially the AC terms.  We can exploit this
04853      * by short-circuiting the IDCT calculation for any column in which all
04854      * the AC terms are zero.  In that case each output is equal to the
04855      * DC coefficient (with scale factor as needed).
04856      * With typical images and quantization tables, half or more of the
04857      * column DCT calculations can be simplified this way.
04858      */
04859 
04860     if (inptr[DCTSIZE*1] == 0 && inptr[DCTSIZE*2] == 0 &&
04861     inptr[DCTSIZE*3] == 0 && inptr[DCTSIZE*4] == 0 &&
04862     inptr[DCTSIZE*5] == 0 && inptr[DCTSIZE*6] == 0 &&
04863     inptr[DCTSIZE*7] == 0) {
04864       /* AC terms all zero */
04865       int dcval = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]) << PASS1_BITS;
04866 
04867       wsptr[4*0] = dcval;
04868       wsptr[4*1] = dcval;
04869       wsptr[4*2] = dcval;
04870       wsptr[4*3] = dcval;
04871       wsptr[4*4] = dcval;
04872       wsptr[4*5] = dcval;
04873       wsptr[4*6] = dcval;
04874       wsptr[4*7] = dcval;
04875 
04876       inptr++;          /* advance pointers to next column */
04877       quantptr++;
04878       wsptr++;
04879       continue;
04880     }
04881 
04882     /* Even part: reverse the even part of the forward DCT.
04883      * The rotator is c(-6).
04884      */
04885 
04886     z2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
04887     z3 = DEQUANTIZE(inptr[DCTSIZE*6], quantptr[DCTSIZE*6]);
04888 
04889     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);       /* c6 */
04890     tmp2 = z1 + MULTIPLY(z2, FIX_0_765366865);     /* c2-c6 */
04891     tmp3 = z1 - MULTIPLY(z3, FIX_1_847759065);     /* c2+c6 */
04892 
04893     z2 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
04894     z3 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
04895     z2 <<= CONST_BITS;
04896     z3 <<= CONST_BITS;
04897     /* Add fudge factor here for final descale. */
04898     z2 += ONE << (CONST_BITS-PASS1_BITS-1);
04899 
04900     tmp0 = z2 + z3;
04901     tmp1 = z2 - z3;
04902 
04903     tmp10 = tmp0 + tmp2;
04904     tmp13 = tmp0 - tmp2;
04905     tmp11 = tmp1 + tmp3;
04906     tmp12 = tmp1 - tmp3;
04907 
04908     /* Odd part per figure 8; the matrix is unitary and hence its
04909      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
04910      */
04911 
04912     tmp0 = DEQUANTIZE(inptr[DCTSIZE*7], quantptr[DCTSIZE*7]);
04913     tmp1 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
04914     tmp2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
04915     tmp3 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
04916 
04917     z2 = tmp0 + tmp2;
04918     z3 = tmp1 + tmp3;
04919 
04920     z1 = MULTIPLY(z2 + z3, FIX_1_175875602);       /*  c3 */
04921     z2 = MULTIPLY(z2, - FIX_1_961570560);          /* -c3-c5 */
04922     z3 = MULTIPLY(z3, - FIX_0_390180644);          /* -c3+c5 */
04923     z2 += z1;
04924     z3 += z1;
04925 
04926     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223); /* -c3+c7 */
04927     tmp0 = MULTIPLY(tmp0, FIX_0_298631336);        /* -c1+c3+c5-c7 */
04928     tmp3 = MULTIPLY(tmp3, FIX_1_501321110);        /*  c1+c3-c5-c7 */
04929     tmp0 += z1 + z2;
04930     tmp3 += z1 + z3;
04931 
04932     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447); /* -c1-c3 */
04933     tmp1 = MULTIPLY(tmp1, FIX_2_053119869);        /*  c1+c3-c5+c7 */
04934     tmp2 = MULTIPLY(tmp2, FIX_3_072711026);        /*  c1+c3+c5-c7 */
04935     tmp1 += z1 + z3;
04936     tmp2 += z1 + z2;
04937 
04938     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
04939 
04940     wsptr[4*0] = (int) RIGHT_SHIFT(tmp10 + tmp3, CONST_BITS-PASS1_BITS);
04941     wsptr[4*7] = (int) RIGHT_SHIFT(tmp10 - tmp3, CONST_BITS-PASS1_BITS);
04942     wsptr[4*1] = (int) RIGHT_SHIFT(tmp11 + tmp2, CONST_BITS-PASS1_BITS);
04943     wsptr[4*6] = (int) RIGHT_SHIFT(tmp11 - tmp2, CONST_BITS-PASS1_BITS);
04944     wsptr[4*2] = (int) RIGHT_SHIFT(tmp12 + tmp1, CONST_BITS-PASS1_BITS);
04945     wsptr[4*5] = (int) RIGHT_SHIFT(tmp12 - tmp1, CONST_BITS-PASS1_BITS);
04946     wsptr[4*3] = (int) RIGHT_SHIFT(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
04947     wsptr[4*4] = (int) RIGHT_SHIFT(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
04948 
04949     inptr++;            /* advance pointers to next column */
04950     quantptr++;
04951     wsptr++;
04952   }
04953 
04954   /* Pass 2: process 8 rows from work array, store into output array.
04955    * 4-point IDCT kernel,
04956    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
04957    */
04958 
04959   wsptr = workspace;
04960   for (ctr = 0; ctr < 8; ctr++) {
04961     outptr = output_buf[ctr] + output_col;
04962 
04963     /* Even part */
04964 
04965     /* Add range center and fudge factor for final descale and range-limit. */
04966     tmp0 = (INT32) wsptr[0] +
04967          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
04968           (ONE << (PASS1_BITS+2)));
04969     tmp2 = (INT32) wsptr[2];
04970 
04971     tmp10 = (tmp0 + tmp2) << CONST_BITS;
04972     tmp12 = (tmp0 - tmp2) << CONST_BITS;
04973 
04974     /* Odd part */
04975     /* Same rotation as in the even part of the 8x8 LL&M IDCT */
04976 
04977     z2 = (INT32) wsptr[1];
04978     z3 = (INT32) wsptr[3];
04979 
04980     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
04981     tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
04982     tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
04983 
04984     /* Final output stage */
04985 
04986     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
04987                           CONST_BITS+PASS1_BITS+3)
04988                 & RANGE_MASK];
04989     outptr[3] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
04990                           CONST_BITS+PASS1_BITS+3)
04991                 & RANGE_MASK];
04992     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp12 + tmp2,
04993                           CONST_BITS+PASS1_BITS+3)
04994                 & RANGE_MASK];
04995     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp12 - tmp2,
04996                           CONST_BITS+PASS1_BITS+3)
04997                 & RANGE_MASK];
04998 
04999     wsptr += 4;     /* advance pointer to next row */
05000   }
05001 }
05002 
05003 
05004 /*
05005  * Perform dequantization and inverse DCT on one block of coefficients,
05006  * producing a reduced-size 3x6 output block.
05007  *
05008  * 6-point IDCT in pass 1 (columns), 3-point in pass 2 (rows).
05009  */
05010 
05011 GLOBAL(void)
05012 jpeg_idct_3x6 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
05013            JCOEFPTR coef_block,
05014            JSAMPARRAY output_buf, JDIMENSION output_col)
05015 {
05016   INT32 tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
05017   INT32 z1, z2, z3;
05018   JCOEFPTR inptr;
05019   ISLOW_MULT_TYPE * quantptr;
05020   int * wsptr;
05021   JSAMPROW outptr;
05022   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
05023   int ctr;
05024   int workspace[3*6];   /* buffers data between passes */
05025   SHIFT_TEMPS
05026 
05027   /* Pass 1: process columns from input, store into work array.
05028    * 6-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
05029    */
05030 
05031   inptr = coef_block;
05032   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
05033   wsptr = workspace;
05034   for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
05035     /* Even part */
05036 
05037     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
05038     tmp0 <<= CONST_BITS;
05039     /* Add fudge factor here for final descale. */
05040     tmp0 += ONE << (CONST_BITS-PASS1_BITS-1);
05041     tmp2 = DEQUANTIZE(inptr[DCTSIZE*4], quantptr[DCTSIZE*4]);
05042     tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
05043     tmp1 = tmp0 + tmp10;
05044     tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS-PASS1_BITS);
05045     tmp10 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
05046     tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
05047     tmp10 = tmp1 + tmp0;
05048     tmp12 = tmp1 - tmp0;
05049 
05050     /* Odd part */
05051 
05052     z1 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
05053     z2 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
05054     z3 = DEQUANTIZE(inptr[DCTSIZE*5], quantptr[DCTSIZE*5]);
05055     tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
05056     tmp0 = tmp1 + ((z1 + z2) << CONST_BITS);
05057     tmp2 = tmp1 + ((z3 - z2) << CONST_BITS);
05058     tmp1 = (z1 - z2 - z3) << PASS1_BITS;
05059 
05060     /* Final output stage */
05061 
05062     wsptr[3*0] = (int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS-PASS1_BITS);
05063     wsptr[3*5] = (int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS-PASS1_BITS);
05064     wsptr[3*1] = (int) (tmp11 + tmp1);
05065     wsptr[3*4] = (int) (tmp11 - tmp1);
05066     wsptr[3*2] = (int) RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS-PASS1_BITS);
05067     wsptr[3*3] = (int) RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS-PASS1_BITS);
05068   }
05069 
05070   /* Pass 2: process 6 rows from work array, store into output array.
05071    * 3-point IDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
05072    */
05073 
05074   wsptr = workspace;
05075   for (ctr = 0; ctr < 6; ctr++) {
05076     outptr = output_buf[ctr] + output_col;
05077 
05078     /* Even part */
05079 
05080     /* Add range center and fudge factor for final descale and range-limit. */
05081     tmp0 = (INT32) wsptr[0] +
05082          ((((INT32) RANGE_CENTER) << (PASS1_BITS+3)) +
05083           (ONE << (PASS1_BITS+2)));
05084     tmp0 <<= CONST_BITS;
05085     tmp2 = (INT32) wsptr[2];
05086     tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
05087     tmp10 = tmp0 + tmp12;
05088     tmp2 = tmp0 - tmp12 - tmp12;
05089 
05090     /* Odd part */
05091 
05092     tmp12 = (INT32) wsptr[1];
05093     tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
05094 
05095     /* Final output stage */
05096 
05097     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0,
05098                           CONST_BITS+PASS1_BITS+3)
05099                 & RANGE_MASK];
05100     outptr[2] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0,
05101                           CONST_BITS+PASS1_BITS+3)
05102                 & RANGE_MASK];
05103     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp2,
05104                           CONST_BITS+PASS1_BITS+3)
05105                 & RANGE_MASK];
05106 
05107     wsptr += 3;     /* advance pointer to next row */
05108   }
05109 }
05110 
05111 
05112 /*
05113  * Perform dequantization and inverse DCT on one block of coefficients,
05114  * producing a 2x4 output block.
05115  *
05116  * 4-point IDCT in pass 1 (columns), 2-point in pass 2 (rows).
05117  */
05118 
05119 GLOBAL(void)
05120 jpeg_idct_2x4 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
05121            JCOEFPTR coef_block,
05122            JSAMPARRAY output_buf, JDIMENSION output_col)
05123 {
05124   INT32 tmp0, tmp2, tmp10, tmp12;
05125   INT32 z1, z2, z3;
05126   JCOEFPTR inptr;
05127   ISLOW_MULT_TYPE * quantptr;
05128   INT32 * wsptr;
05129   JSAMPROW outptr;
05130   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
05131   int ctr;
05132   INT32 workspace[2*4]; /* buffers data between passes */
05133   SHIFT_TEMPS
05134 
05135   /* Pass 1: process columns from input, store into work array.
05136    * 4-point IDCT kernel,
05137    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point IDCT].
05138    */
05139 
05140   inptr = coef_block;
05141   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
05142   wsptr = workspace;
05143   for (ctr = 0; ctr < 2; ctr++, inptr++, quantptr++, wsptr++) {
05144     /* Even part */
05145 
05146     tmp0 = DEQUANTIZE(inptr[DCTSIZE*0], quantptr[DCTSIZE*0]);
05147     tmp2 = DEQUANTIZE(inptr[DCTSIZE*2], quantptr[DCTSIZE*2]);
05148 
05149     tmp10 = (tmp0 + tmp2) << CONST_BITS;
05150     tmp12 = (tmp0 - tmp2) << CONST_BITS;
05151 
05152     /* Odd part */
05153     /* Same rotation as in the even part of the 8x8 LL&M IDCT */
05154 
05155     z2 = DEQUANTIZE(inptr[DCTSIZE*1], quantptr[DCTSIZE*1]);
05156     z3 = DEQUANTIZE(inptr[DCTSIZE*3], quantptr[DCTSIZE*3]);
05157 
05158     z1 = MULTIPLY(z2 + z3, FIX_0_541196100);   /* c6 */
05159     tmp0 = z1 + MULTIPLY(z2, FIX_0_765366865); /* c2-c6 */
05160     tmp2 = z1 - MULTIPLY(z3, FIX_1_847759065); /* c2+c6 */
05161 
05162     /* Final output stage */
05163 
05164     wsptr[2*0] = tmp10 + tmp0;
05165     wsptr[2*3] = tmp10 - tmp0;
05166     wsptr[2*1] = tmp12 + tmp2;
05167     wsptr[2*2] = tmp12 - tmp2;
05168   }
05169 
05170   /* Pass 2: process 4 rows from work array, store into output array. */
05171 
05172   wsptr = workspace;
05173   for (ctr = 0; ctr < 4; ctr++) {
05174     outptr = output_buf[ctr] + output_col;
05175 
05176     /* Even part */
05177 
05178     /* Add range center and fudge factor for final descale and range-limit. */
05179     tmp10 = wsptr[0] +
05180           ((((INT32) RANGE_CENTER) << (CONST_BITS+3)) +
05181            (ONE << (CONST_BITS+2)));
05182 
05183     /* Odd part */
05184 
05185     tmp0 = wsptr[1];
05186 
05187     /* Final output stage */
05188 
05189     outptr[0] = range_limit[(int) RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS+3)
05190                 & RANGE_MASK];
05191     outptr[1] = range_limit[(int) RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS+3)
05192                 & RANGE_MASK];
05193 
05194     wsptr += 2;     /* advance pointer to next row */
05195   }
05196 }
05197 
05198 
05199 /*
05200  * Perform dequantization and inverse DCT on one block of coefficients,
05201  * producing a 1x2 output block.
05202  *
05203  * 2-point IDCT in pass 1 (columns), 1-point in pass 2 (rows).
05204  */
05205 
05206 GLOBAL(void)
05207 jpeg_idct_1x2 (j_decompress_ptr cinfo, jpeg_component_info * compptr,
05208            JCOEFPTR coef_block,
05209            JSAMPARRAY output_buf, JDIMENSION output_col)
05210 {
05211   DCTELEM tmp0, tmp1;
05212   ISLOW_MULT_TYPE * quantptr;
05213   JSAMPLE *range_limit = IDCT_range_limit(cinfo);
05214   ISHIFT_TEMPS
05215 
05216   /* Process 1 column from input, store into output array. */
05217 
05218   quantptr = (ISLOW_MULT_TYPE *) compptr->dct_table;
05219 
05220   /* Even part */
05221 
05222   tmp0 = DEQUANTIZE(coef_block[DCTSIZE*0], quantptr[DCTSIZE*0]);
05223   /* Add range center and fudge factor for final descale and range-limit. */
05224   tmp0 += (((DCTELEM) RANGE_CENTER) << 3) + (1 << 2);
05225 
05226   /* Odd part */
05227 
05228   tmp1 = DEQUANTIZE(coef_block[DCTSIZE*1], quantptr[DCTSIZE*1]);
05229 
05230   /* Final output stage */
05231 
05232   output_buf[0][output_col] =
05233     range_limit[(int) IRIGHT_SHIFT(tmp0 + tmp1, 3) & RANGE_MASK];
05234   output_buf[1][output_col] =
05235     range_limit[(int) IRIGHT_SHIFT(tmp0 - tmp1, 3) & RANGE_MASK];
05236 }
05237 
05238 #endif /* IDCT_SCALING_SUPPORTED */
05239 #endif /* DCT_ISLOW_SUPPORTED */