Shoaib Ahmed / Mbed 2 deprecated uzairkhan

Dependencies:   uzair Camera_LS_Y201 F7_Ethernet LCD_DISCO_F746NG NetworkAPI SDFileSystem mbed

Embed: (wiki syntax)

« Back to documentation index

Show/hide line numbers jfdctint.c Source File

jfdctint.c

00001 /*
00002  * jfdctint.c
00003  *
00004  * Copyright (C) 1991-1996, Thomas G. Lane.
00005  * Modification developed 2003-2015 by Guido Vollbeding.
00006  * This file is part of the Independent JPEG Group's software.
00007  * For conditions of distribution and use, see the accompanying README file.
00008  *
00009  * This file contains a slow-but-accurate integer implementation of the
00010  * forward DCT (Discrete Cosine Transform).
00011  *
00012  * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
00013  * on each column.  Direct algorithms are also available, but they are
00014  * much more complex and seem not to be any faster when reduced to code.
00015  *
00016  * This implementation is based on an algorithm described in
00017  *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
00018  *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
00019  *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
00020  * The primary algorithm described there uses 11 multiplies and 29 adds.
00021  * We use their alternate method with 12 multiplies and 32 adds.
00022  * The advantage of this method is that no data path contains more than one
00023  * multiplication; this allows a very simple and accurate implementation in
00024  * scaled fixed-point arithmetic, with a minimal number of shifts.
00025  *
00026  * We also provide FDCT routines with various input sample block sizes for
00027  * direct resolution reduction or enlargement and for direct resolving the
00028  * common 2x1 and 1x2 subsampling cases without additional resampling: NxN
00029  * (N=1...16), 2NxN, and Nx2N (N=1...8) pixels for one 8x8 output DCT block.
00030  *
00031  * For N<8 we fill the remaining block coefficients with zero.
00032  * For N>8 we apply a partial N-point FDCT on the input samples, computing
00033  * just the lower 8 frequency coefficients and discarding the rest.
00034  *
00035  * We must scale the output coefficients of the N-point FDCT appropriately
00036  * to the standard 8-point FDCT level by 8/N per 1-D pass.  This scaling
00037  * is folded into the constant multipliers (pass 2) and/or final/initial
00038  * shifting.
00039  *
00040  * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
00041  * since there would be too many additional constants to pre-calculate.
00042  */
00043 
00044 #define JPEG_INTERNALS
00045 #include "jinclude.h"
00046 #include "jpeglib.h"
00047 #include "jdct.h"       /* Private declarations for DCT subsystem */
00048 
00049 #ifdef DCT_ISLOW_SUPPORTED
00050 
00051 
00052 /*
00053  * This module is specialized to the case DCTSIZE = 8.
00054  */
00055 
00056 #if DCTSIZE != 8
00057   Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
00058 #endif
00059 
00060 
00061 /*
00062  * The poop on this scaling stuff is as follows:
00063  *
00064  * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
00065  * larger than the true DCT outputs.  The final outputs are therefore
00066  * a factor of N larger than desired; since N=8 this can be cured by
00067  * a simple right shift at the end of the algorithm.  The advantage of
00068  * this arrangement is that we save two multiplications per 1-D DCT,
00069  * because the y0 and y4 outputs need not be divided by sqrt(N).
00070  * In the IJG code, this factor of 8 is removed by the quantization step
00071  * (in jcdctmgr.c), NOT in this module.
00072  *
00073  * We have to do addition and subtraction of the integer inputs, which
00074  * is no problem, and multiplication by fractional constants, which is
00075  * a problem to do in integer arithmetic.  We multiply all the constants
00076  * by CONST_SCALE and convert them to integer constants (thus retaining
00077  * CONST_BITS bits of precision in the constants).  After doing a
00078  * multiplication we have to divide the product by CONST_SCALE, with proper
00079  * rounding, to produce the correct output.  This division can be done
00080  * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
00081  * as long as possible so that partial sums can be added together with
00082  * full fractional precision.
00083  *
00084  * The outputs of the first pass are scaled up by PASS1_BITS bits so that
00085  * they are represented to better-than-integral precision.  These outputs
00086  * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
00087  * with the recommended scaling.  (For 12-bit sample data, the intermediate
00088  * array is INT32 anyway.)
00089  *
00090  * To avoid overflow of the 32-bit intermediate results in pass 2, we must
00091  * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
00092  * shows that the values given below are the most effective.
00093  */
00094 
00095 #if BITS_IN_JSAMPLE == 8
00096 #define CONST_BITS  13
00097 #define PASS1_BITS  2
00098 #else
00099 #define CONST_BITS  13
00100 #define PASS1_BITS  1       /* lose a little precision to avoid overflow */
00101 #endif
00102 
00103 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
00104  * causing a lot of useless floating-point operations at run time.
00105  * To get around this we use the following pre-calculated constants.
00106  * If you change CONST_BITS you may want to add appropriate values.
00107  * (With a reasonable C compiler, you can just rely on the FIX() macro...)
00108  */
00109 
00110 #if CONST_BITS == 13
00111 #define FIX_0_298631336  ((INT32)  2446)    /* FIX(0.298631336) */
00112 #define FIX_0_390180644  ((INT32)  3196)    /* FIX(0.390180644) */
00113 #define FIX_0_541196100  ((INT32)  4433)    /* FIX(0.541196100) */
00114 #define FIX_0_765366865  ((INT32)  6270)    /* FIX(0.765366865) */
00115 #define FIX_0_899976223  ((INT32)  7373)    /* FIX(0.899976223) */
00116 #define FIX_1_175875602  ((INT32)  9633)    /* FIX(1.175875602) */
00117 #define FIX_1_501321110  ((INT32)  12299)   /* FIX(1.501321110) */
00118 #define FIX_1_847759065  ((INT32)  15137)   /* FIX(1.847759065) */
00119 #define FIX_1_961570560  ((INT32)  16069)   /* FIX(1.961570560) */
00120 #define FIX_2_053119869  ((INT32)  16819)   /* FIX(2.053119869) */
00121 #define FIX_2_562915447  ((INT32)  20995)   /* FIX(2.562915447) */
00122 #define FIX_3_072711026  ((INT32)  25172)   /* FIX(3.072711026) */
00123 #else
00124 #define FIX_0_298631336  FIX(0.298631336)
00125 #define FIX_0_390180644  FIX(0.390180644)
00126 #define FIX_0_541196100  FIX(0.541196100)
00127 #define FIX_0_765366865  FIX(0.765366865)
00128 #define FIX_0_899976223  FIX(0.899976223)
00129 #define FIX_1_175875602  FIX(1.175875602)
00130 #define FIX_1_501321110  FIX(1.501321110)
00131 #define FIX_1_847759065  FIX(1.847759065)
00132 #define FIX_1_961570560  FIX(1.961570560)
00133 #define FIX_2_053119869  FIX(2.053119869)
00134 #define FIX_2_562915447  FIX(2.562915447)
00135 #define FIX_3_072711026  FIX(3.072711026)
00136 #endif
00137 
00138 
00139 /* Multiply an INT32 variable by an INT32 constant to yield an INT32 result.
00140  * For 8-bit samples with the recommended scaling, all the variable
00141  * and constant values involved are no more than 16 bits wide, so a
00142  * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
00143  * For 12-bit samples, a full 32-bit multiplication will be needed.
00144  */
00145 
00146 #if BITS_IN_JSAMPLE == 8
00147 #define MULTIPLY(var,const)  MULTIPLY16C16(var,const)
00148 #else
00149 #define MULTIPLY(var,const)  ((var) * (const))
00150 #endif
00151 
00152 
00153 /*
00154  * Perform the forward DCT on one block of samples.
00155  */
00156 
00157 GLOBAL(void)
00158 jpeg_fdct_islow (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00159 {
00160   INT32 tmp0, tmp1, tmp2, tmp3;
00161   INT32 tmp10, tmp11, tmp12, tmp13;
00162   INT32 z1;
00163   DCTELEM *dataptr;
00164   JSAMPROW elemptr;
00165   int ctr;
00166   SHIFT_TEMPS
00167 
00168   /* Pass 1: process rows.
00169    * Note results are scaled up by sqrt(8) compared to a true DCT;
00170    * furthermore, we scale the results by 2**PASS1_BITS.
00171    * cK represents sqrt(2) * cos(K*pi/16).
00172    */
00173 
00174   dataptr = data;
00175   for (ctr = 0; ctr < DCTSIZE; ctr++) {
00176     elemptr = sample_data[ctr] + start_col;
00177 
00178     /* Even part per LL&M figure 1 --- note that published figure is faulty;
00179      * rotator "c1" should be "c6".
00180      */
00181 
00182     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
00183     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
00184     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
00185     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
00186 
00187     tmp10 = tmp0 + tmp3;
00188     tmp12 = tmp0 - tmp3;
00189     tmp11 = tmp1 + tmp2;
00190     tmp13 = tmp1 - tmp2;
00191 
00192     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
00193     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
00194     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
00195     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
00196 
00197     /* Apply unsigned->signed conversion. */
00198     dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
00199     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
00200 
00201     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
00202     /* Add fudge factor here for final descale. */
00203     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
00204 
00205     dataptr[2] = (DCTELEM)
00206       RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
00207           CONST_BITS-PASS1_BITS);
00208     dataptr[6] = (DCTELEM)
00209       RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
00210           CONST_BITS-PASS1_BITS);
00211 
00212     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
00213      * i0..i3 in the paper are tmp0..tmp3 here.
00214      */
00215 
00216     tmp12 = tmp0 + tmp2;
00217     tmp13 = tmp1 + tmp3;
00218 
00219     z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
00220     /* Add fudge factor here for final descale. */
00221     z1 += ONE << (CONST_BITS-PASS1_BITS-1);
00222 
00223     tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
00224     tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
00225     tmp12 += z1;
00226     tmp13 += z1;
00227 
00228     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
00229     tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
00230     tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
00231     tmp0 += z1 + tmp12;
00232     tmp3 += z1 + tmp13;
00233 
00234     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
00235     tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
00236     tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
00237     tmp1 += z1 + tmp13;
00238     tmp2 += z1 + tmp12;
00239 
00240     dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS);
00241     dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS);
00242     dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS);
00243     dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS);
00244 
00245     dataptr += DCTSIZE;     /* advance pointer to next row */
00246   }
00247 
00248   /* Pass 2: process columns.
00249    * We remove the PASS1_BITS scaling, but leave the results scaled up
00250    * by an overall factor of 8.
00251    * cK represents sqrt(2) * cos(K*pi/16).
00252    */
00253 
00254   dataptr = data;
00255   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
00256     /* Even part per LL&M figure 1 --- note that published figure is faulty;
00257      * rotator "c1" should be "c6".
00258      */
00259 
00260     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
00261     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
00262     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
00263     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
00264 
00265     /* Add fudge factor here for final descale. */
00266     tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
00267     tmp12 = tmp0 - tmp3;
00268     tmp11 = tmp1 + tmp2;
00269     tmp13 = tmp1 - tmp2;
00270 
00271     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
00272     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
00273     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
00274     tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
00275 
00276     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
00277     dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
00278 
00279     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
00280     /* Add fudge factor here for final descale. */
00281     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
00282 
00283     dataptr[DCTSIZE*2] = (DCTELEM)
00284       RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
00285           CONST_BITS+PASS1_BITS);
00286     dataptr[DCTSIZE*6] = (DCTELEM)
00287       RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
00288           CONST_BITS+PASS1_BITS);
00289 
00290     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
00291      * i0..i3 in the paper are tmp0..tmp3 here.
00292      */
00293 
00294     tmp12 = tmp0 + tmp2;
00295     tmp13 = tmp1 + tmp3;
00296 
00297     z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
00298     /* Add fudge factor here for final descale. */
00299     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
00300 
00301     tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
00302     tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
00303     tmp12 += z1;
00304     tmp13 += z1;
00305 
00306     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
00307     tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
00308     tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
00309     tmp0 += z1 + tmp12;
00310     tmp3 += z1 + tmp13;
00311 
00312     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
00313     tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
00314     tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
00315     tmp1 += z1 + tmp13;
00316     tmp2 += z1 + tmp12;
00317 
00318     dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
00319     dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
00320     dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
00321     dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
00322 
00323     dataptr++;          /* advance pointer to next column */
00324   }
00325 }
00326 
00327 #ifdef DCT_SCALING_SUPPORTED
00328 
00329 
00330 /*
00331  * Perform the forward DCT on a 7x7 sample block.
00332  */
00333 
00334 GLOBAL(void)
00335 jpeg_fdct_7x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00336 {
00337   INT32 tmp0, tmp1, tmp2, tmp3;
00338   INT32 tmp10, tmp11, tmp12;
00339   INT32 z1, z2, z3;
00340   DCTELEM *dataptr;
00341   JSAMPROW elemptr;
00342   int ctr;
00343   SHIFT_TEMPS
00344 
00345   /* Pre-zero output coefficient block. */
00346   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
00347 
00348   /* Pass 1: process rows.
00349    * Note results are scaled up by sqrt(8) compared to a true DCT;
00350    * furthermore, we scale the results by 2**PASS1_BITS.
00351    * cK represents sqrt(2) * cos(K*pi/14).
00352    */
00353 
00354   dataptr = data;
00355   for (ctr = 0; ctr < 7; ctr++) {
00356     elemptr = sample_data[ctr] + start_col;
00357 
00358     /* Even part */
00359 
00360     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
00361     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
00362     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
00363     tmp3 = GETJSAMPLE(elemptr[3]);
00364 
00365     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
00366     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
00367     tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
00368 
00369     z1 = tmp0 + tmp2;
00370     /* Apply unsigned->signed conversion. */
00371     dataptr[0] = (DCTELEM)
00372       ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
00373     tmp3 += tmp3;
00374     z1 -= tmp3;
00375     z1 -= tmp3;
00376     z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
00377     z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
00378     z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
00379     dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
00380     z1 -= z2;
00381     z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
00382     dataptr[4] = (DCTELEM)
00383       DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
00384           CONST_BITS-PASS1_BITS);
00385     dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
00386 
00387     /* Odd part */
00388 
00389     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
00390     tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
00391     tmp0 = tmp1 - tmp2;
00392     tmp1 += tmp2;
00393     tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
00394     tmp1 += tmp2;
00395     tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
00396     tmp0 += tmp3;
00397     tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
00398 
00399     dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
00400     dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
00401     dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
00402 
00403     dataptr += DCTSIZE;     /* advance pointer to next row */
00404   }
00405 
00406   /* Pass 2: process columns.
00407    * We remove the PASS1_BITS scaling, but leave the results scaled up
00408    * by an overall factor of 8.
00409    * We must also scale the output by (8/7)**2 = 64/49, which we fold
00410    * into the constant multipliers:
00411    * cK now represents sqrt(2) * cos(K*pi/14) * 64/49.
00412    */
00413 
00414   dataptr = data;
00415   for (ctr = 0; ctr < 7; ctr++) {
00416     /* Even part */
00417 
00418     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
00419     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
00420     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
00421     tmp3 = dataptr[DCTSIZE*3];
00422 
00423     tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
00424     tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
00425     tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
00426 
00427     z1 = tmp0 + tmp2;
00428     dataptr[DCTSIZE*0] = (DCTELEM)
00429       DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
00430           CONST_BITS+PASS1_BITS);
00431     tmp3 += tmp3;
00432     z1 -= tmp3;
00433     z1 -= tmp3;
00434     z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
00435     z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
00436     z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
00437     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS);
00438     z1 -= z2;
00439     z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
00440     dataptr[DCTSIZE*4] = (DCTELEM)
00441       DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
00442           CONST_BITS+PASS1_BITS);
00443     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS);
00444 
00445     /* Odd part */
00446 
00447     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
00448     tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
00449     tmp0 = tmp1 - tmp2;
00450     tmp1 += tmp2;
00451     tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
00452     tmp1 += tmp2;
00453     tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
00454     tmp0 += tmp3;
00455     tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
00456 
00457     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS);
00458     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS);
00459     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS);
00460 
00461     dataptr++;          /* advance pointer to next column */
00462   }
00463 }
00464 
00465 
00466 /*
00467  * Perform the forward DCT on a 6x6 sample block.
00468  */
00469 
00470 GLOBAL(void)
00471 jpeg_fdct_6x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00472 {
00473   INT32 tmp0, tmp1, tmp2;
00474   INT32 tmp10, tmp11, tmp12;
00475   DCTELEM *dataptr;
00476   JSAMPROW elemptr;
00477   int ctr;
00478   SHIFT_TEMPS
00479 
00480   /* Pre-zero output coefficient block. */
00481   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
00482 
00483   /* Pass 1: process rows.
00484    * Note results are scaled up by sqrt(8) compared to a true DCT;
00485    * furthermore, we scale the results by 2**PASS1_BITS.
00486    * cK represents sqrt(2) * cos(K*pi/12).
00487    */
00488 
00489   dataptr = data;
00490   for (ctr = 0; ctr < 6; ctr++) {
00491     elemptr = sample_data[ctr] + start_col;
00492 
00493     /* Even part */
00494 
00495     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
00496     tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
00497     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
00498 
00499     tmp10 = tmp0 + tmp2;
00500     tmp12 = tmp0 - tmp2;
00501 
00502     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
00503     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
00504     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
00505 
00506     /* Apply unsigned->signed conversion. */
00507     dataptr[0] = (DCTELEM)
00508       ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
00509     dataptr[2] = (DCTELEM)
00510       DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
00511           CONST_BITS-PASS1_BITS);
00512     dataptr[4] = (DCTELEM)
00513       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
00514           CONST_BITS-PASS1_BITS);
00515 
00516     /* Odd part */
00517 
00518     tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
00519             CONST_BITS-PASS1_BITS);
00520 
00521     dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
00522     dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
00523     dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
00524 
00525     dataptr += DCTSIZE;     /* advance pointer to next row */
00526   }
00527 
00528   /* Pass 2: process columns.
00529    * We remove the PASS1_BITS scaling, but leave the results scaled up
00530    * by an overall factor of 8.
00531    * We must also scale the output by (8/6)**2 = 16/9, which we fold
00532    * into the constant multipliers:
00533    * cK now represents sqrt(2) * cos(K*pi/12) * 16/9.
00534    */
00535 
00536   dataptr = data;
00537   for (ctr = 0; ctr < 6; ctr++) {
00538     /* Even part */
00539 
00540     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
00541     tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
00542     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
00543 
00544     tmp10 = tmp0 + tmp2;
00545     tmp12 = tmp0 - tmp2;
00546 
00547     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
00548     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
00549     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
00550 
00551     dataptr[DCTSIZE*0] = (DCTELEM)
00552       DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
00553           CONST_BITS+PASS1_BITS);
00554     dataptr[DCTSIZE*2] = (DCTELEM)
00555       DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
00556           CONST_BITS+PASS1_BITS);
00557     dataptr[DCTSIZE*4] = (DCTELEM)
00558       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
00559           CONST_BITS+PASS1_BITS);
00560 
00561     /* Odd part */
00562 
00563     tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
00564 
00565     dataptr[DCTSIZE*1] = (DCTELEM)
00566       DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
00567           CONST_BITS+PASS1_BITS);
00568     dataptr[DCTSIZE*3] = (DCTELEM)
00569       DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
00570           CONST_BITS+PASS1_BITS);
00571     dataptr[DCTSIZE*5] = (DCTELEM)
00572       DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
00573           CONST_BITS+PASS1_BITS);
00574 
00575     dataptr++;          /* advance pointer to next column */
00576   }
00577 }
00578 
00579 
00580 /*
00581  * Perform the forward DCT on a 5x5 sample block.
00582  */
00583 
00584 GLOBAL(void)
00585 jpeg_fdct_5x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00586 {
00587   INT32 tmp0, tmp1, tmp2;
00588   INT32 tmp10, tmp11;
00589   DCTELEM *dataptr;
00590   JSAMPROW elemptr;
00591   int ctr;
00592   SHIFT_TEMPS
00593 
00594   /* Pre-zero output coefficient block. */
00595   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
00596 
00597   /* Pass 1: process rows.
00598    * Note results are scaled up by sqrt(8) compared to a true DCT;
00599    * furthermore, we scale the results by 2**PASS1_BITS.
00600    * We scale the results further by 2 as part of output adaption
00601    * scaling for different DCT size.
00602    * cK represents sqrt(2) * cos(K*pi/10).
00603    */
00604 
00605   dataptr = data;
00606   for (ctr = 0; ctr < 5; ctr++) {
00607     elemptr = sample_data[ctr] + start_col;
00608 
00609     /* Even part */
00610 
00611     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
00612     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
00613     tmp2 = GETJSAMPLE(elemptr[2]);
00614 
00615     tmp10 = tmp0 + tmp1;
00616     tmp11 = tmp0 - tmp1;
00617 
00618     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
00619     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
00620 
00621     /* Apply unsigned->signed conversion. */
00622     dataptr[0] = (DCTELEM)
00623       ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << (PASS1_BITS+1));
00624     tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
00625     tmp10 -= tmp2 << 2;
00626     tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
00627     dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS-1);
00628     dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS-1);
00629 
00630     /* Odd part */
00631 
00632     tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
00633 
00634     dataptr[1] = (DCTELEM)
00635       DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
00636           CONST_BITS-PASS1_BITS-1);
00637     dataptr[3] = (DCTELEM)
00638       DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
00639           CONST_BITS-PASS1_BITS-1);
00640 
00641     dataptr += DCTSIZE;     /* advance pointer to next row */
00642   }
00643 
00644   /* Pass 2: process columns.
00645    * We remove the PASS1_BITS scaling, but leave the results scaled up
00646    * by an overall factor of 8.
00647    * We must also scale the output by (8/5)**2 = 64/25, which we partially
00648    * fold into the constant multipliers (other part was done in pass 1):
00649    * cK now represents sqrt(2) * cos(K*pi/10) * 32/25.
00650    */
00651 
00652   dataptr = data;
00653   for (ctr = 0; ctr < 5; ctr++) {
00654     /* Even part */
00655 
00656     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
00657     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
00658     tmp2 = dataptr[DCTSIZE*2];
00659 
00660     tmp10 = tmp0 + tmp1;
00661     tmp11 = tmp0 - tmp1;
00662 
00663     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
00664     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
00665 
00666     dataptr[DCTSIZE*0] = (DCTELEM)
00667       DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
00668           CONST_BITS+PASS1_BITS);
00669     tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
00670     tmp10 -= tmp2 << 2;
00671     tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
00672     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
00673     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
00674 
00675     /* Odd part */
00676 
00677     tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
00678 
00679     dataptr[DCTSIZE*1] = (DCTELEM)
00680       DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
00681           CONST_BITS+PASS1_BITS);
00682     dataptr[DCTSIZE*3] = (DCTELEM)
00683       DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
00684           CONST_BITS+PASS1_BITS);
00685 
00686     dataptr++;          /* advance pointer to next column */
00687   }
00688 }
00689 
00690 
00691 /*
00692  * Perform the forward DCT on a 4x4 sample block.
00693  */
00694 
00695 GLOBAL(void)
00696 jpeg_fdct_4x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00697 {
00698   INT32 tmp0, tmp1;
00699   INT32 tmp10, tmp11;
00700   DCTELEM *dataptr;
00701   JSAMPROW elemptr;
00702   int ctr;
00703   SHIFT_TEMPS
00704 
00705   /* Pre-zero output coefficient block. */
00706   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
00707 
00708   /* Pass 1: process rows.
00709    * Note results are scaled up by sqrt(8) compared to a true DCT;
00710    * furthermore, we scale the results by 2**PASS1_BITS.
00711    * We must also scale the output by (8/4)**2 = 2**2, which we add here.
00712    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
00713    */
00714 
00715   dataptr = data;
00716   for (ctr = 0; ctr < 4; ctr++) {
00717     elemptr = sample_data[ctr] + start_col;
00718 
00719     /* Even part */
00720 
00721     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
00722     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
00723 
00724     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
00725     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
00726 
00727     /* Apply unsigned->signed conversion. */
00728     dataptr[0] = (DCTELEM)
00729       ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+2));
00730     dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+2));
00731 
00732     /* Odd part */
00733 
00734     tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
00735     /* Add fudge factor here for final descale. */
00736     tmp0 += ONE << (CONST_BITS-PASS1_BITS-3);
00737 
00738     dataptr[1] = (DCTELEM)
00739       RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
00740           CONST_BITS-PASS1_BITS-2);
00741     dataptr[3] = (DCTELEM)
00742       RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
00743           CONST_BITS-PASS1_BITS-2);
00744 
00745     dataptr += DCTSIZE;     /* advance pointer to next row */
00746   }
00747 
00748   /* Pass 2: process columns.
00749    * We remove the PASS1_BITS scaling, but leave the results scaled up
00750    * by an overall factor of 8.
00751    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
00752    */
00753 
00754   dataptr = data;
00755   for (ctr = 0; ctr < 4; ctr++) {
00756     /* Even part */
00757 
00758     /* Add fudge factor here for final descale. */
00759     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
00760     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
00761 
00762     tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
00763     tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
00764 
00765     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
00766     dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
00767 
00768     /* Odd part */
00769 
00770     tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
00771     /* Add fudge factor here for final descale. */
00772     tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
00773 
00774     dataptr[DCTSIZE*1] = (DCTELEM)
00775       RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
00776           CONST_BITS+PASS1_BITS);
00777     dataptr[DCTSIZE*3] = (DCTELEM)
00778       RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
00779           CONST_BITS+PASS1_BITS);
00780 
00781     dataptr++;          /* advance pointer to next column */
00782   }
00783 }
00784 
00785 
00786 /*
00787  * Perform the forward DCT on a 3x3 sample block.
00788  */
00789 
00790 GLOBAL(void)
00791 jpeg_fdct_3x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00792 {
00793   INT32 tmp0, tmp1, tmp2;
00794   DCTELEM *dataptr;
00795   JSAMPROW elemptr;
00796   int ctr;
00797   SHIFT_TEMPS
00798 
00799   /* Pre-zero output coefficient block. */
00800   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
00801 
00802   /* Pass 1: process rows.
00803    * Note results are scaled up by sqrt(8) compared to a true DCT;
00804    * furthermore, we scale the results by 2**PASS1_BITS.
00805    * We scale the results further by 2**2 as part of output adaption
00806    * scaling for different DCT size.
00807    * cK represents sqrt(2) * cos(K*pi/6).
00808    */
00809 
00810   dataptr = data;
00811   for (ctr = 0; ctr < 3; ctr++) {
00812     elemptr = sample_data[ctr] + start_col;
00813 
00814     /* Even part */
00815 
00816     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
00817     tmp1 = GETJSAMPLE(elemptr[1]);
00818 
00819     tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
00820 
00821     /* Apply unsigned->signed conversion. */
00822     dataptr[0] = (DCTELEM)
00823       ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+2));
00824     dataptr[2] = (DCTELEM)
00825       DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
00826           CONST_BITS-PASS1_BITS-2);
00827 
00828     /* Odd part */
00829 
00830     dataptr[1] = (DCTELEM)
00831       DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
00832           CONST_BITS-PASS1_BITS-2);
00833 
00834     dataptr += DCTSIZE;     /* advance pointer to next row */
00835   }
00836 
00837   /* Pass 2: process columns.
00838    * We remove the PASS1_BITS scaling, but leave the results scaled up
00839    * by an overall factor of 8.
00840    * We must also scale the output by (8/3)**2 = 64/9, which we partially
00841    * fold into the constant multipliers (other part was done in pass 1):
00842    * cK now represents sqrt(2) * cos(K*pi/6) * 16/9.
00843    */
00844 
00845   dataptr = data;
00846   for (ctr = 0; ctr < 3; ctr++) {
00847     /* Even part */
00848 
00849     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
00850     tmp1 = dataptr[DCTSIZE*1];
00851 
00852     tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
00853 
00854     dataptr[DCTSIZE*0] = (DCTELEM)
00855       DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
00856           CONST_BITS+PASS1_BITS);
00857     dataptr[DCTSIZE*2] = (DCTELEM)
00858       DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
00859           CONST_BITS+PASS1_BITS);
00860 
00861     /* Odd part */
00862 
00863     dataptr[DCTSIZE*1] = (DCTELEM)
00864       DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
00865           CONST_BITS+PASS1_BITS);
00866 
00867     dataptr++;          /* advance pointer to next column */
00868   }
00869 }
00870 
00871 
00872 /*
00873  * Perform the forward DCT on a 2x2 sample block.
00874  */
00875 
00876 GLOBAL(void)
00877 jpeg_fdct_2x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00878 {
00879   DCTELEM tmp0, tmp1, tmp2, tmp3;
00880   JSAMPROW elemptr;
00881 
00882   /* Pre-zero output coefficient block. */
00883   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
00884 
00885   /* Pass 1: process rows.
00886    * Note results are scaled up by sqrt(8) compared to a true DCT.
00887    */
00888 
00889   /* Row 0 */
00890   elemptr = sample_data[0] + start_col;
00891 
00892   tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
00893   tmp1 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
00894 
00895   /* Row 1 */
00896   elemptr = sample_data[1] + start_col;
00897 
00898   tmp2 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[1]);
00899   tmp3 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[1]);
00900 
00901   /* Pass 2: process columns.
00902    * We leave the results scaled up by an overall factor of 8.
00903    * We must also scale the output by (8/2)**2 = 2**4.
00904    */
00905 
00906   /* Column 0 */
00907   /* Apply unsigned->signed conversion. */
00908   data[DCTSIZE*0] = (tmp0 + tmp2 - 4 * CENTERJSAMPLE) << 4;
00909   data[DCTSIZE*1] = (tmp0 - tmp2) << 4;
00910 
00911   /* Column 1 */
00912   data[DCTSIZE*0+1] = (tmp1 + tmp3) << 4;
00913   data[DCTSIZE*1+1] = (tmp1 - tmp3) << 4;
00914 }
00915 
00916 
00917 /*
00918  * Perform the forward DCT on a 1x1 sample block.
00919  */
00920 
00921 GLOBAL(void)
00922 jpeg_fdct_1x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00923 {
00924   DCTELEM dcval;
00925 
00926   /* Pre-zero output coefficient block. */
00927   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
00928 
00929   dcval = GETJSAMPLE(sample_data[0][start_col]);
00930 
00931   /* We leave the result scaled up by an overall factor of 8. */
00932   /* We must also scale the output by (8/1)**2 = 2**6. */
00933   /* Apply unsigned->signed conversion. */
00934   data[0] = (dcval - CENTERJSAMPLE) << 6;
00935 }
00936 
00937 
00938 /*
00939  * Perform the forward DCT on a 9x9 sample block.
00940  */
00941 
00942 GLOBAL(void)
00943 jpeg_fdct_9x9 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
00944 {
00945   INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
00946   INT32 tmp10, tmp11, tmp12, tmp13;
00947   INT32 z1, z2;
00948   DCTELEM workspace[8];
00949   DCTELEM *dataptr;
00950   DCTELEM *wsptr;
00951   JSAMPROW elemptr;
00952   int ctr;
00953   SHIFT_TEMPS
00954 
00955   /* Pass 1: process rows.
00956    * Note results are scaled up by sqrt(8) compared to a true DCT;
00957    * we scale the results further by 2 as part of output adaption
00958    * scaling for different DCT size.
00959    * cK represents sqrt(2) * cos(K*pi/18).
00960    */
00961 
00962   dataptr = data;
00963   ctr = 0;
00964   for (;;) {
00965     elemptr = sample_data[ctr] + start_col;
00966 
00967     /* Even part */
00968 
00969     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[8]);
00970     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[7]);
00971     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[6]);
00972     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[5]);
00973     tmp4 = GETJSAMPLE(elemptr[4]);
00974 
00975     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[8]);
00976     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[7]);
00977     tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[6]);
00978     tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[5]);
00979 
00980     z1 = tmp0 + tmp2 + tmp3;
00981     z2 = tmp1 + tmp4;
00982     /* Apply unsigned->signed conversion. */
00983     dataptr[0] = (DCTELEM) ((z1 + z2 - 9 * CENTERJSAMPLE) << 1);
00984     dataptr[6] = (DCTELEM)
00985       DESCALE(MULTIPLY(z1 - z2 - z2, FIX(0.707106781)),  /* c6 */
00986           CONST_BITS-1);
00987     z1 = MULTIPLY(tmp0 - tmp2, FIX(1.328926049));        /* c2 */
00988     z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(0.707106781)); /* c6 */
00989     dataptr[2] = (DCTELEM)
00990       DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.083350441))    /* c4 */
00991           + z1 + z2, CONST_BITS-1);
00992     dataptr[4] = (DCTELEM)
00993       DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.245575608))    /* c8 */
00994           + z1 - z2, CONST_BITS-1);
00995 
00996     /* Odd part */
00997 
00998     dataptr[3] = (DCTELEM)
00999       DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.224744871)), /* c3 */
01000           CONST_BITS-1);
01001 
01002     tmp11 = MULTIPLY(tmp11, FIX(1.224744871));        /* c3 */
01003     tmp0 = MULTIPLY(tmp10 + tmp12, FIX(0.909038955)); /* c5 */
01004     tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.483689525)); /* c7 */
01005 
01006     dataptr[1] = (DCTELEM) DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS-1);
01007 
01008     tmp2 = MULTIPLY(tmp12 - tmp13, FIX(1.392728481)); /* c1 */
01009 
01010     dataptr[5] = (DCTELEM) DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS-1);
01011     dataptr[7] = (DCTELEM) DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS-1);
01012 
01013     ctr++;
01014 
01015     if (ctr != DCTSIZE) {
01016       if (ctr == 9)
01017     break;          /* Done. */
01018       dataptr += DCTSIZE;   /* advance pointer to next row */
01019     } else
01020       dataptr = workspace;  /* switch pointer to extended workspace */
01021   }
01022 
01023   /* Pass 2: process columns.
01024    * We leave the results scaled up by an overall factor of 8.
01025    * We must also scale the output by (8/9)**2 = 64/81, which we partially
01026    * fold into the constant multipliers and final/initial shifting:
01027    * cK now represents sqrt(2) * cos(K*pi/18) * 128/81.
01028    */
01029 
01030   dataptr = data;
01031   wsptr = workspace;
01032   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
01033     /* Even part */
01034 
01035     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*0];
01036     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*7];
01037     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*6];
01038     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*5];
01039     tmp4 = dataptr[DCTSIZE*4];
01040 
01041     tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*0];
01042     tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*7];
01043     tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*6];
01044     tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*5];
01045 
01046     z1 = tmp0 + tmp2 + tmp3;
01047     z2 = tmp1 + tmp4;
01048     dataptr[DCTSIZE*0] = (DCTELEM)
01049       DESCALE(MULTIPLY(z1 + z2, FIX(1.580246914)),       /* 128/81 */
01050           CONST_BITS+2);
01051     dataptr[DCTSIZE*6] = (DCTELEM)
01052       DESCALE(MULTIPLY(z1 - z2 - z2, FIX(1.117403309)),  /* c6 */
01053           CONST_BITS+2);
01054     z1 = MULTIPLY(tmp0 - tmp2, FIX(2.100031287));        /* c2 */
01055     z2 = MULTIPLY(tmp1 - tmp4 - tmp4, FIX(1.117403309)); /* c6 */
01056     dataptr[DCTSIZE*2] = (DCTELEM)
01057       DESCALE(MULTIPLY(tmp2 - tmp3, FIX(1.711961190))    /* c4 */
01058           + z1 + z2, CONST_BITS+2);
01059     dataptr[DCTSIZE*4] = (DCTELEM)
01060       DESCALE(MULTIPLY(tmp3 - tmp0, FIX(0.388070096))    /* c8 */
01061           + z1 - z2, CONST_BITS+2);
01062 
01063     /* Odd part */
01064 
01065     dataptr[DCTSIZE*3] = (DCTELEM)
01066       DESCALE(MULTIPLY(tmp10 - tmp12 - tmp13, FIX(1.935399303)), /* c3 */
01067           CONST_BITS+2);
01068 
01069     tmp11 = MULTIPLY(tmp11, FIX(1.935399303));        /* c3 */
01070     tmp0 = MULTIPLY(tmp10 + tmp12, FIX(1.436506004)); /* c5 */
01071     tmp1 = MULTIPLY(tmp10 + tmp13, FIX(0.764348879)); /* c7 */
01072 
01073     dataptr[DCTSIZE*1] = (DCTELEM)
01074       DESCALE(tmp11 + tmp0 + tmp1, CONST_BITS+2);
01075 
01076     tmp2 = MULTIPLY(tmp12 - tmp13, FIX(2.200854883)); /* c1 */
01077 
01078     dataptr[DCTSIZE*5] = (DCTELEM)
01079       DESCALE(tmp0 - tmp11 - tmp2, CONST_BITS+2);
01080     dataptr[DCTSIZE*7] = (DCTELEM)
01081       DESCALE(tmp1 - tmp11 + tmp2, CONST_BITS+2);
01082 
01083     dataptr++;          /* advance pointer to next column */
01084     wsptr++;            /* advance pointer to next column */
01085   }
01086 }
01087 
01088 
01089 /*
01090  * Perform the forward DCT on a 10x10 sample block.
01091  */
01092 
01093 GLOBAL(void)
01094 jpeg_fdct_10x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
01095 {
01096   INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
01097   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
01098   DCTELEM workspace[8*2];
01099   DCTELEM *dataptr;
01100   DCTELEM *wsptr;
01101   JSAMPROW elemptr;
01102   int ctr;
01103   SHIFT_TEMPS
01104 
01105   /* Pass 1: process rows.
01106    * Note results are scaled up by sqrt(8) compared to a true DCT;
01107    * we scale the results further by 2 as part of output adaption
01108    * scaling for different DCT size.
01109    * cK represents sqrt(2) * cos(K*pi/20).
01110    */
01111 
01112   dataptr = data;
01113   ctr = 0;
01114   for (;;) {
01115     elemptr = sample_data[ctr] + start_col;
01116 
01117     /* Even part */
01118 
01119     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
01120     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
01121     tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
01122     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
01123     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
01124 
01125     tmp10 = tmp0 + tmp4;
01126     tmp13 = tmp0 - tmp4;
01127     tmp11 = tmp1 + tmp3;
01128     tmp14 = tmp1 - tmp3;
01129 
01130     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
01131     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
01132     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
01133     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
01134     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
01135 
01136     /* Apply unsigned->signed conversion. */
01137     dataptr[0] = (DCTELEM)
01138       ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << 1);
01139     tmp12 += tmp12;
01140     dataptr[4] = (DCTELEM)
01141       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
01142           MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
01143           CONST_BITS-1);
01144     tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
01145     dataptr[2] = (DCTELEM)
01146       DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
01147           CONST_BITS-1);
01148     dataptr[6] = (DCTELEM)
01149       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
01150           CONST_BITS-1);
01151 
01152     /* Odd part */
01153 
01154     tmp10 = tmp0 + tmp4;
01155     tmp11 = tmp1 - tmp3;
01156     dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << 1);
01157     tmp2 <<= CONST_BITS;
01158     dataptr[1] = (DCTELEM)
01159       DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
01160           MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
01161           MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
01162           MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
01163           CONST_BITS-1);
01164     tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
01165         MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
01166     tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
01167         (tmp11 << (CONST_BITS - 1)) - tmp2;
01168     dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-1);
01169     dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-1);
01170 
01171     ctr++;
01172 
01173     if (ctr != DCTSIZE) {
01174       if (ctr == 10)
01175     break;          /* Done. */
01176       dataptr += DCTSIZE;   /* advance pointer to next row */
01177     } else
01178       dataptr = workspace;  /* switch pointer to extended workspace */
01179   }
01180 
01181   /* Pass 2: process columns.
01182    * We leave the results scaled up by an overall factor of 8.
01183    * We must also scale the output by (8/10)**2 = 16/25, which we partially
01184    * fold into the constant multipliers and final/initial shifting:
01185    * cK now represents sqrt(2) * cos(K*pi/20) * 32/25.
01186    */
01187 
01188   dataptr = data;
01189   wsptr = workspace;
01190   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
01191     /* Even part */
01192 
01193     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
01194     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
01195     tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
01196     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
01197     tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
01198 
01199     tmp10 = tmp0 + tmp4;
01200     tmp13 = tmp0 - tmp4;
01201     tmp11 = tmp1 + tmp3;
01202     tmp14 = tmp1 - tmp3;
01203 
01204     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
01205     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
01206     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
01207     tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
01208     tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
01209 
01210     dataptr[DCTSIZE*0] = (DCTELEM)
01211       DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
01212           CONST_BITS+2);
01213     tmp12 += tmp12;
01214     dataptr[DCTSIZE*4] = (DCTELEM)
01215       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
01216           MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
01217           CONST_BITS+2);
01218     tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
01219     dataptr[DCTSIZE*2] = (DCTELEM)
01220       DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
01221           CONST_BITS+2);
01222     dataptr[DCTSIZE*6] = (DCTELEM)
01223       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
01224           CONST_BITS+2);
01225 
01226     /* Odd part */
01227 
01228     tmp10 = tmp0 + tmp4;
01229     tmp11 = tmp1 - tmp3;
01230     dataptr[DCTSIZE*5] = (DCTELEM)
01231       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
01232           CONST_BITS+2);
01233     tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
01234     dataptr[DCTSIZE*1] = (DCTELEM)
01235       DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
01236           MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
01237           MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
01238           MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
01239           CONST_BITS+2);
01240     tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
01241         MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
01242     tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
01243         MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
01244     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+2);
01245     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+2);
01246 
01247     dataptr++;          /* advance pointer to next column */
01248     wsptr++;            /* advance pointer to next column */
01249   }
01250 }
01251 
01252 
01253 /*
01254  * Perform the forward DCT on an 11x11 sample block.
01255  */
01256 
01257 GLOBAL(void)
01258 jpeg_fdct_11x11 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
01259 {
01260   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
01261   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
01262   INT32 z1, z2, z3;
01263   DCTELEM workspace[8*3];
01264   DCTELEM *dataptr;
01265   DCTELEM *wsptr;
01266   JSAMPROW elemptr;
01267   int ctr;
01268   SHIFT_TEMPS
01269 
01270   /* Pass 1: process rows.
01271    * Note results are scaled up by sqrt(8) compared to a true DCT;
01272    * we scale the results further by 2 as part of output adaption
01273    * scaling for different DCT size.
01274    * cK represents sqrt(2) * cos(K*pi/22).
01275    */
01276 
01277   dataptr = data;
01278   ctr = 0;
01279   for (;;) {
01280     elemptr = sample_data[ctr] + start_col;
01281 
01282     /* Even part */
01283 
01284     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[10]);
01285     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[9]);
01286     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[8]);
01287     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[7]);
01288     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[6]);
01289     tmp5 = GETJSAMPLE(elemptr[5]);
01290 
01291     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[10]);
01292     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[9]);
01293     tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[8]);
01294     tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[7]);
01295     tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[6]);
01296 
01297     /* Apply unsigned->signed conversion. */
01298     dataptr[0] = (DCTELEM)
01299       ((tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 - 11 * CENTERJSAMPLE) << 1);
01300     tmp5 += tmp5;
01301     tmp0 -= tmp5;
01302     tmp1 -= tmp5;
01303     tmp2 -= tmp5;
01304     tmp3 -= tmp5;
01305     tmp4 -= tmp5;
01306     z1 = MULTIPLY(tmp0 + tmp3, FIX(1.356927976)) +       /* c2 */
01307      MULTIPLY(tmp2 + tmp4, FIX(0.201263574));        /* c10 */
01308     z2 = MULTIPLY(tmp1 - tmp3, FIX(0.926112931));        /* c6 */
01309     z3 = MULTIPLY(tmp0 - tmp1, FIX(1.189712156));        /* c4 */
01310     dataptr[2] = (DCTELEM)
01311       DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.018300590)) /* c2+c8-c6 */
01312           - MULTIPLY(tmp4, FIX(1.390975730)),        /* c4+c10 */
01313           CONST_BITS-1);
01314     dataptr[4] = (DCTELEM)
01315       DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.062335650)) /* c4-c6-c10 */
01316           - MULTIPLY(tmp2, FIX(1.356927976))         /* c2 */
01317           + MULTIPLY(tmp4, FIX(0.587485545)),        /* c8 */
01318           CONST_BITS-1);
01319     dataptr[6] = (DCTELEM)
01320       DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.620527200)) /* c2+c4-c6 */
01321           - MULTIPLY(tmp2, FIX(0.788749120)),        /* c8+c10 */
01322           CONST_BITS-1);
01323 
01324     /* Odd part */
01325 
01326     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.286413905));    /* c3 */
01327     tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.068791298));    /* c5 */
01328     tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.764581576));    /* c7 */
01329     tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.719967871)) /* c7+c5+c3-c1 */
01330        + MULTIPLY(tmp14, FIX(0.398430003));          /* c9 */
01331     tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.764581576));  /* -c7 */
01332     tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.399818907));  /* -c1 */
01333     tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.276416582)) /* c9+c7+c1-c3 */
01334         - MULTIPLY(tmp14, FIX(1.068791298));         /* c5 */
01335     tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.398430003));   /* c9 */
01336     tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(1.989053629)) /* c9+c5+c3-c7 */
01337         + MULTIPLY(tmp14, FIX(1.399818907));         /* c1 */
01338     tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.305598626)) /* c1+c5-c9-c7 */
01339         - MULTIPLY(tmp14, FIX(1.286413905));         /* c3 */
01340 
01341     dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-1);
01342     dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-1);
01343     dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-1);
01344     dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-1);
01345 
01346     ctr++;
01347 
01348     if (ctr != DCTSIZE) {
01349       if (ctr == 11)
01350     break;          /* Done. */
01351       dataptr += DCTSIZE;   /* advance pointer to next row */
01352     } else
01353       dataptr = workspace;  /* switch pointer to extended workspace */
01354   }
01355 
01356   /* Pass 2: process columns.
01357    * We leave the results scaled up by an overall factor of 8.
01358    * We must also scale the output by (8/11)**2 = 64/121, which we partially
01359    * fold into the constant multipliers and final/initial shifting:
01360    * cK now represents sqrt(2) * cos(K*pi/22) * 128/121.
01361    */
01362 
01363   dataptr = data;
01364   wsptr = workspace;
01365   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
01366     /* Even part */
01367 
01368     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*2];
01369     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*1];
01370     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*0];
01371     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*7];
01372     tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*6];
01373     tmp5 = dataptr[DCTSIZE*5];
01374 
01375     tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*2];
01376     tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*1];
01377     tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*0];
01378     tmp13 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*7];
01379     tmp14 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*6];
01380 
01381     dataptr[DCTSIZE*0] = (DCTELEM)
01382       DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5,
01383                FIX(1.057851240)),                /* 128/121 */
01384           CONST_BITS+2);
01385     tmp5 += tmp5;
01386     tmp0 -= tmp5;
01387     tmp1 -= tmp5;
01388     tmp2 -= tmp5;
01389     tmp3 -= tmp5;
01390     tmp4 -= tmp5;
01391     z1 = MULTIPLY(tmp0 + tmp3, FIX(1.435427942)) +       /* c2 */
01392      MULTIPLY(tmp2 + tmp4, FIX(0.212906922));        /* c10 */
01393     z2 = MULTIPLY(tmp1 - tmp3, FIX(0.979689713));        /* c6 */
01394     z3 = MULTIPLY(tmp0 - tmp1, FIX(1.258538479));        /* c4 */
01395     dataptr[DCTSIZE*2] = (DCTELEM)
01396       DESCALE(z1 + z2 - MULTIPLY(tmp3, FIX(1.077210542)) /* c2+c8-c6 */
01397           - MULTIPLY(tmp4, FIX(1.471445400)),        /* c4+c10 */
01398           CONST_BITS+2);
01399     dataptr[DCTSIZE*4] = (DCTELEM)
01400       DESCALE(z2 + z3 + MULTIPLY(tmp1, FIX(0.065941844)) /* c4-c6-c10 */
01401           - MULTIPLY(tmp2, FIX(1.435427942))         /* c2 */
01402           + MULTIPLY(tmp4, FIX(0.621472312)),        /* c8 */
01403           CONST_BITS+2);
01404     dataptr[DCTSIZE*6] = (DCTELEM)
01405       DESCALE(z1 + z3 - MULTIPLY(tmp0, FIX(1.714276708)) /* c2+c4-c6 */
01406           - MULTIPLY(tmp2, FIX(0.834379234)),        /* c8+c10 */
01407           CONST_BITS+2);
01408 
01409     /* Odd part */
01410 
01411     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.360834544));    /* c3 */
01412     tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.130622199));    /* c5 */
01413     tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.808813568));    /* c7 */
01414     tmp0 = tmp1 + tmp2 + tmp3 - MULTIPLY(tmp10, FIX(1.819470145)) /* c7+c5+c3-c1 */
01415        + MULTIPLY(tmp14, FIX(0.421479672));          /* c9 */
01416     tmp4 = MULTIPLY(tmp11 + tmp12, - FIX(0.808813568));  /* -c7 */
01417     tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.480800167));  /* -c1 */
01418     tmp1 += tmp4 + tmp5 + MULTIPLY(tmp11, FIX(1.350258864)) /* c9+c7+c1-c3 */
01419         - MULTIPLY(tmp14, FIX(1.130622199));         /* c5 */
01420     tmp10 = MULTIPLY(tmp12 + tmp13, FIX(0.421479672));   /* c9 */
01421     tmp2 += tmp4 + tmp10 - MULTIPLY(tmp12, FIX(2.104122847)) /* c9+c5+c3-c7 */
01422         + MULTIPLY(tmp14, FIX(1.480800167));         /* c1 */
01423     tmp3 += tmp5 + tmp10 + MULTIPLY(tmp13, FIX(1.381129125)) /* c1+c5-c9-c7 */
01424         - MULTIPLY(tmp14, FIX(1.360834544));         /* c3 */
01425 
01426     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
01427     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
01428     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
01429     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
01430 
01431     dataptr++;          /* advance pointer to next column */
01432     wsptr++;            /* advance pointer to next column */
01433   }
01434 }
01435 
01436 
01437 /*
01438  * Perform the forward DCT on a 12x12 sample block.
01439  */
01440 
01441 GLOBAL(void)
01442 jpeg_fdct_12x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
01443 {
01444   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
01445   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
01446   DCTELEM workspace[8*4];
01447   DCTELEM *dataptr;
01448   DCTELEM *wsptr;
01449   JSAMPROW elemptr;
01450   int ctr;
01451   SHIFT_TEMPS
01452 
01453   /* Pass 1: process rows.
01454    * Note results are scaled up by sqrt(8) compared to a true DCT.
01455    * cK represents sqrt(2) * cos(K*pi/24).
01456    */
01457 
01458   dataptr = data;
01459   ctr = 0;
01460   for (;;) {
01461     elemptr = sample_data[ctr] + start_col;
01462 
01463     /* Even part */
01464 
01465     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
01466     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
01467     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
01468     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
01469     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
01470     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
01471 
01472     tmp10 = tmp0 + tmp5;
01473     tmp13 = tmp0 - tmp5;
01474     tmp11 = tmp1 + tmp4;
01475     tmp14 = tmp1 - tmp4;
01476     tmp12 = tmp2 + tmp3;
01477     tmp15 = tmp2 - tmp3;
01478 
01479     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
01480     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
01481     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
01482     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
01483     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
01484     tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
01485 
01486     /* Apply unsigned->signed conversion. */
01487     dataptr[0] = (DCTELEM) (tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE);
01488     dataptr[6] = (DCTELEM) (tmp13 - tmp14 - tmp15);
01489     dataptr[4] = (DCTELEM)
01490       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
01491           CONST_BITS);
01492     dataptr[2] = (DCTELEM)
01493       DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
01494           CONST_BITS);
01495 
01496     /* Odd part */
01497 
01498     tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
01499     tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
01500     tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
01501     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
01502     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
01503     tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
01504         + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
01505     tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
01506     tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
01507         + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
01508     tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
01509         - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
01510     tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
01511         - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
01512 
01513     dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS);
01514     dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS);
01515     dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS);
01516     dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS);
01517 
01518     ctr++;
01519 
01520     if (ctr != DCTSIZE) {
01521       if (ctr == 12)
01522     break;          /* Done. */
01523       dataptr += DCTSIZE;   /* advance pointer to next row */
01524     } else
01525       dataptr = workspace;  /* switch pointer to extended workspace */
01526   }
01527 
01528   /* Pass 2: process columns.
01529    * We leave the results scaled up by an overall factor of 8.
01530    * We must also scale the output by (8/12)**2 = 4/9, which we partially
01531    * fold into the constant multipliers and final shifting:
01532    * cK now represents sqrt(2) * cos(K*pi/24) * 8/9.
01533    */
01534 
01535   dataptr = data;
01536   wsptr = workspace;
01537   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
01538     /* Even part */
01539 
01540     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
01541     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
01542     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
01543     tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
01544     tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
01545     tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
01546 
01547     tmp10 = tmp0 + tmp5;
01548     tmp13 = tmp0 - tmp5;
01549     tmp11 = tmp1 + tmp4;
01550     tmp14 = tmp1 - tmp4;
01551     tmp12 = tmp2 + tmp3;
01552     tmp15 = tmp2 - tmp3;
01553 
01554     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
01555     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
01556     tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
01557     tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
01558     tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
01559     tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
01560 
01561     dataptr[DCTSIZE*0] = (DCTELEM)
01562       DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
01563           CONST_BITS+1);
01564     dataptr[DCTSIZE*6] = (DCTELEM)
01565       DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
01566           CONST_BITS+1);
01567     dataptr[DCTSIZE*4] = (DCTELEM)
01568       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
01569           CONST_BITS+1);
01570     dataptr[DCTSIZE*2] = (DCTELEM)
01571       DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
01572           MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
01573           CONST_BITS+1);
01574 
01575     /* Odd part */
01576 
01577     tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
01578     tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
01579     tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
01580     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
01581     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
01582     tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
01583         + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
01584     tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
01585     tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
01586         + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
01587     tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
01588         - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
01589     tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
01590         - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
01591 
01592     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+1);
01593     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+1);
01594     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+1);
01595     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+1);
01596 
01597     dataptr++;          /* advance pointer to next column */
01598     wsptr++;            /* advance pointer to next column */
01599   }
01600 }
01601 
01602 
01603 /*
01604  * Perform the forward DCT on a 13x13 sample block.
01605  */
01606 
01607 GLOBAL(void)
01608 jpeg_fdct_13x13 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
01609 {
01610   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
01611   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
01612   INT32 z1, z2;
01613   DCTELEM workspace[8*5];
01614   DCTELEM *dataptr;
01615   DCTELEM *wsptr;
01616   JSAMPROW elemptr;
01617   int ctr;
01618   SHIFT_TEMPS
01619 
01620   /* Pass 1: process rows.
01621    * Note results are scaled up by sqrt(8) compared to a true DCT.
01622    * cK represents sqrt(2) * cos(K*pi/26).
01623    */
01624 
01625   dataptr = data;
01626   ctr = 0;
01627   for (;;) {
01628     elemptr = sample_data[ctr] + start_col;
01629 
01630     /* Even part */
01631 
01632     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[12]);
01633     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[11]);
01634     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[10]);
01635     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[9]);
01636     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[8]);
01637     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[7]);
01638     tmp6 = GETJSAMPLE(elemptr[6]);
01639 
01640     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[12]);
01641     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[11]);
01642     tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[10]);
01643     tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[9]);
01644     tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[8]);
01645     tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[7]);
01646 
01647     /* Apply unsigned->signed conversion. */
01648     dataptr[0] = (DCTELEM)
01649       (tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6 - 13 * CENTERJSAMPLE);
01650     tmp6 += tmp6;
01651     tmp0 -= tmp6;
01652     tmp1 -= tmp6;
01653     tmp2 -= tmp6;
01654     tmp3 -= tmp6;
01655     tmp4 -= tmp6;
01656     tmp5 -= tmp6;
01657     dataptr[2] = (DCTELEM)
01658       DESCALE(MULTIPLY(tmp0, FIX(1.373119086)) +   /* c2 */
01659           MULTIPLY(tmp1, FIX(1.058554052)) +   /* c6 */
01660           MULTIPLY(tmp2, FIX(0.501487041)) -   /* c10 */
01661           MULTIPLY(tmp3, FIX(0.170464608)) -   /* c12 */
01662           MULTIPLY(tmp4, FIX(0.803364869)) -   /* c8 */
01663           MULTIPLY(tmp5, FIX(1.252223920)),    /* c4 */
01664           CONST_BITS);
01665     z1 = MULTIPLY(tmp0 - tmp2, FIX(1.155388986)) - /* (c4+c6)/2 */
01666      MULTIPLY(tmp3 - tmp4, FIX(0.435816023)) - /* (c2-c10)/2 */
01667      MULTIPLY(tmp1 - tmp5, FIX(0.316450131));  /* (c8-c12)/2 */
01668     z2 = MULTIPLY(tmp0 + tmp2, FIX(0.096834934)) - /* (c4-c6)/2 */
01669      MULTIPLY(tmp3 + tmp4, FIX(0.937303064)) + /* (c2+c10)/2 */
01670      MULTIPLY(tmp1 + tmp5, FIX(0.486914739));  /* (c8+c12)/2 */
01671 
01672     dataptr[4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS);
01673     dataptr[6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS);
01674 
01675     /* Odd part */
01676 
01677     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.322312651));   /* c3 */
01678     tmp2 = MULTIPLY(tmp10 + tmp12, FIX(1.163874945));   /* c5 */
01679     tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.937797057)) +  /* c7 */
01680        MULTIPLY(tmp14 + tmp15, FIX(0.338443458));   /* c11 */
01681     tmp0 = tmp1 + tmp2 + tmp3 -
01682        MULTIPLY(tmp10, FIX(2.020082300)) +          /* c3+c5+c7-c1 */
01683        MULTIPLY(tmp14, FIX(0.318774355));           /* c9-c11 */
01684     tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.937797057)) -  /* c7 */
01685        MULTIPLY(tmp11 + tmp12, FIX(0.338443458));   /* c11 */
01686     tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(1.163874945)); /* -c5 */
01687     tmp1 += tmp4 + tmp5 +
01688         MULTIPLY(tmp11, FIX(0.837223564)) -         /* c5+c9+c11-c3 */
01689         MULTIPLY(tmp14, FIX(2.341699410));          /* c1+c7 */
01690     tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.657217813)); /* -c9 */
01691     tmp2 += tmp4 + tmp6 -
01692         MULTIPLY(tmp12, FIX(1.572116027)) +         /* c1+c5-c9-c11 */
01693         MULTIPLY(tmp15, FIX(2.260109708));          /* c3+c7 */
01694     tmp3 += tmp5 + tmp6 +
01695         MULTIPLY(tmp13, FIX(2.205608352)) -         /* c3+c5+c9-c7 */
01696         MULTIPLY(tmp15, FIX(1.742345811));          /* c1+c11 */
01697 
01698     dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
01699     dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
01700     dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
01701     dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
01702 
01703     ctr++;
01704 
01705     if (ctr != DCTSIZE) {
01706       if (ctr == 13)
01707     break;          /* Done. */
01708       dataptr += DCTSIZE;   /* advance pointer to next row */
01709     } else
01710       dataptr = workspace;  /* switch pointer to extended workspace */
01711   }
01712 
01713   /* Pass 2: process columns.
01714    * We leave the results scaled up by an overall factor of 8.
01715    * We must also scale the output by (8/13)**2 = 64/169, which we partially
01716    * fold into the constant multipliers and final shifting:
01717    * cK now represents sqrt(2) * cos(K*pi/26) * 128/169.
01718    */
01719 
01720   dataptr = data;
01721   wsptr = workspace;
01722   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
01723     /* Even part */
01724 
01725     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*4];
01726     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*3];
01727     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*2];
01728     tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*1];
01729     tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*0];
01730     tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*7];
01731     tmp6 = dataptr[DCTSIZE*6];
01732 
01733     tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*4];
01734     tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*3];
01735     tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*2];
01736     tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*1];
01737     tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*0];
01738     tmp15 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*7];
01739 
01740     dataptr[DCTSIZE*0] = (DCTELEM)
01741       DESCALE(MULTIPLY(tmp0 + tmp1 + tmp2 + tmp3 + tmp4 + tmp5 + tmp6,
01742                FIX(0.757396450)),          /* 128/169 */
01743           CONST_BITS+1);
01744     tmp6 += tmp6;
01745     tmp0 -= tmp6;
01746     tmp1 -= tmp6;
01747     tmp2 -= tmp6;
01748     tmp3 -= tmp6;
01749     tmp4 -= tmp6;
01750     tmp5 -= tmp6;
01751     dataptr[DCTSIZE*2] = (DCTELEM)
01752       DESCALE(MULTIPLY(tmp0, FIX(1.039995521)) +   /* c2 */
01753           MULTIPLY(tmp1, FIX(0.801745081)) +   /* c6 */
01754           MULTIPLY(tmp2, FIX(0.379824504)) -   /* c10 */
01755           MULTIPLY(tmp3, FIX(0.129109289)) -   /* c12 */
01756           MULTIPLY(tmp4, FIX(0.608465700)) -   /* c8 */
01757           MULTIPLY(tmp5, FIX(0.948429952)),    /* c4 */
01758           CONST_BITS+1);
01759     z1 = MULTIPLY(tmp0 - tmp2, FIX(0.875087516)) - /* (c4+c6)/2 */
01760      MULTIPLY(tmp3 - tmp4, FIX(0.330085509)) - /* (c2-c10)/2 */
01761      MULTIPLY(tmp1 - tmp5, FIX(0.239678205));  /* (c8-c12)/2 */
01762     z2 = MULTIPLY(tmp0 + tmp2, FIX(0.073342435)) - /* (c4-c6)/2 */
01763      MULTIPLY(tmp3 + tmp4, FIX(0.709910013)) + /* (c2+c10)/2 */
01764      MULTIPLY(tmp1 + tmp5, FIX(0.368787494));  /* (c8+c12)/2 */
01765 
01766     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+1);
01767     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 - z2, CONST_BITS+1);
01768 
01769     /* Odd part */
01770 
01771     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.001514908));   /* c3 */
01772     tmp2 = MULTIPLY(tmp10 + tmp12, FIX(0.881514751));   /* c5 */
01773     tmp3 = MULTIPLY(tmp10 + tmp13, FIX(0.710284161)) +  /* c7 */
01774        MULTIPLY(tmp14 + tmp15, FIX(0.256335874));   /* c11 */
01775     tmp0 = tmp1 + tmp2 + tmp3 -
01776        MULTIPLY(tmp10, FIX(1.530003162)) +          /* c3+c5+c7-c1 */
01777        MULTIPLY(tmp14, FIX(0.241438564));           /* c9-c11 */
01778     tmp4 = MULTIPLY(tmp14 - tmp15, FIX(0.710284161)) -  /* c7 */
01779        MULTIPLY(tmp11 + tmp12, FIX(0.256335874));   /* c11 */
01780     tmp5 = MULTIPLY(tmp11 + tmp13, - FIX(0.881514751)); /* -c5 */
01781     tmp1 += tmp4 + tmp5 +
01782         MULTIPLY(tmp11, FIX(0.634110155)) -         /* c5+c9+c11-c3 */
01783         MULTIPLY(tmp14, FIX(1.773594819));          /* c1+c7 */
01784     tmp6 = MULTIPLY(tmp12 + tmp13, - FIX(0.497774438)); /* -c9 */
01785     tmp2 += tmp4 + tmp6 -
01786         MULTIPLY(tmp12, FIX(1.190715098)) +         /* c1+c5-c9-c11 */
01787         MULTIPLY(tmp15, FIX(1.711799069));          /* c3+c7 */
01788     tmp3 += tmp5 + tmp6 +
01789         MULTIPLY(tmp13, FIX(1.670519935)) -         /* c3+c5+c9-c7 */
01790         MULTIPLY(tmp15, FIX(1.319646532));          /* c1+c11 */
01791 
01792     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+1);
01793     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+1);
01794     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+1);
01795     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+1);
01796 
01797     dataptr++;          /* advance pointer to next column */
01798     wsptr++;            /* advance pointer to next column */
01799   }
01800 }
01801 
01802 
01803 /*
01804  * Perform the forward DCT on a 14x14 sample block.
01805  */
01806 
01807 GLOBAL(void)
01808 jpeg_fdct_14x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
01809 {
01810   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
01811   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
01812   DCTELEM workspace[8*6];
01813   DCTELEM *dataptr;
01814   DCTELEM *wsptr;
01815   JSAMPROW elemptr;
01816   int ctr;
01817   SHIFT_TEMPS
01818 
01819   /* Pass 1: process rows.
01820    * Note results are scaled up by sqrt(8) compared to a true DCT.
01821    * cK represents sqrt(2) * cos(K*pi/28).
01822    */
01823 
01824   dataptr = data;
01825   ctr = 0;
01826   for (;;) {
01827     elemptr = sample_data[ctr] + start_col;
01828 
01829     /* Even part */
01830 
01831     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
01832     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
01833     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
01834     tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
01835     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
01836     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
01837     tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
01838 
01839     tmp10 = tmp0 + tmp6;
01840     tmp14 = tmp0 - tmp6;
01841     tmp11 = tmp1 + tmp5;
01842     tmp15 = tmp1 - tmp5;
01843     tmp12 = tmp2 + tmp4;
01844     tmp16 = tmp2 - tmp4;
01845 
01846     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
01847     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
01848     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
01849     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
01850     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
01851     tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
01852     tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
01853 
01854     /* Apply unsigned->signed conversion. */
01855     dataptr[0] = (DCTELEM)
01856       (tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE);
01857     tmp13 += tmp13;
01858     dataptr[4] = (DCTELEM)
01859       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
01860           MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
01861           MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
01862           CONST_BITS);
01863 
01864     tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
01865 
01866     dataptr[2] = (DCTELEM)
01867       DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
01868           + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
01869           CONST_BITS);
01870     dataptr[6] = (DCTELEM)
01871       DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
01872           - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
01873           CONST_BITS);
01874 
01875     /* Odd part */
01876 
01877     tmp10 = tmp1 + tmp2;
01878     tmp11 = tmp5 - tmp4;
01879     dataptr[7] = (DCTELEM) (tmp0 - tmp10 + tmp3 - tmp11 - tmp6);
01880     tmp3 <<= CONST_BITS;
01881     tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
01882     tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
01883     tmp10 += tmp11 - tmp3;
01884     tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
01885         MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
01886     dataptr[5] = (DCTELEM)
01887       DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
01888           + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
01889           CONST_BITS);
01890     tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
01891         MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
01892     dataptr[3] = (DCTELEM)
01893       DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
01894           - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
01895           CONST_BITS);
01896     dataptr[1] = (DCTELEM)
01897       DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
01898           MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
01899           CONST_BITS);
01900 
01901     ctr++;
01902 
01903     if (ctr != DCTSIZE) {
01904       if (ctr == 14)
01905     break;          /* Done. */
01906       dataptr += DCTSIZE;   /* advance pointer to next row */
01907     } else
01908       dataptr = workspace;  /* switch pointer to extended workspace */
01909   }
01910 
01911   /* Pass 2: process columns.
01912    * We leave the results scaled up by an overall factor of 8.
01913    * We must also scale the output by (8/14)**2 = 16/49, which we partially
01914    * fold into the constant multipliers and final shifting:
01915    * cK now represents sqrt(2) * cos(K*pi/28) * 32/49.
01916    */
01917 
01918   dataptr = data;
01919   wsptr = workspace;
01920   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
01921     /* Even part */
01922 
01923     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
01924     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
01925     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
01926     tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
01927     tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
01928     tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
01929     tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
01930 
01931     tmp10 = tmp0 + tmp6;
01932     tmp14 = tmp0 - tmp6;
01933     tmp11 = tmp1 + tmp5;
01934     tmp15 = tmp1 - tmp5;
01935     tmp12 = tmp2 + tmp4;
01936     tmp16 = tmp2 - tmp4;
01937 
01938     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
01939     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
01940     tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
01941     tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
01942     tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
01943     tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
01944     tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
01945 
01946     dataptr[DCTSIZE*0] = (DCTELEM)
01947       DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
01948                FIX(0.653061224)),                 /* 32/49 */
01949           CONST_BITS+1);
01950     tmp13 += tmp13;
01951     dataptr[DCTSIZE*4] = (DCTELEM)
01952       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
01953           MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
01954           MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
01955           CONST_BITS+1);
01956 
01957     tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
01958 
01959     dataptr[DCTSIZE*2] = (DCTELEM)
01960       DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
01961           + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
01962           CONST_BITS+1);
01963     dataptr[DCTSIZE*6] = (DCTELEM)
01964       DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
01965           - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
01966           CONST_BITS+1);
01967 
01968     /* Odd part */
01969 
01970     tmp10 = tmp1 + tmp2;
01971     tmp11 = tmp5 - tmp4;
01972     dataptr[DCTSIZE*7] = (DCTELEM)
01973       DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
01974                FIX(0.653061224)),                 /* 32/49 */
01975           CONST_BITS+1);
01976     tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
01977     tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
01978     tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
01979     tmp10 += tmp11 - tmp3;
01980     tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
01981         MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
01982     dataptr[DCTSIZE*5] = (DCTELEM)
01983       DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
01984           + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
01985           CONST_BITS+1);
01986     tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
01987         MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
01988     dataptr[DCTSIZE*3] = (DCTELEM)
01989       DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
01990           - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
01991           CONST_BITS+1);
01992     dataptr[DCTSIZE*1] = (DCTELEM)
01993       DESCALE(tmp11 + tmp12 + tmp3
01994           - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
01995           - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
01996           CONST_BITS+1);
01997 
01998     dataptr++;          /* advance pointer to next column */
01999     wsptr++;            /* advance pointer to next column */
02000   }
02001 }
02002 
02003 
02004 /*
02005  * Perform the forward DCT on a 15x15 sample block.
02006  */
02007 
02008 GLOBAL(void)
02009 jpeg_fdct_15x15 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
02010 {
02011   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
02012   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
02013   INT32 z1, z2, z3;
02014   DCTELEM workspace[8*7];
02015   DCTELEM *dataptr;
02016   DCTELEM *wsptr;
02017   JSAMPROW elemptr;
02018   int ctr;
02019   SHIFT_TEMPS
02020 
02021   /* Pass 1: process rows.
02022    * Note results are scaled up by sqrt(8) compared to a true DCT.
02023    * cK represents sqrt(2) * cos(K*pi/30).
02024    */
02025 
02026   dataptr = data;
02027   ctr = 0;
02028   for (;;) {
02029     elemptr = sample_data[ctr] + start_col;
02030 
02031     /* Even part */
02032 
02033     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[14]);
02034     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[13]);
02035     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[12]);
02036     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[11]);
02037     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[10]);
02038     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[9]);
02039     tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[8]);
02040     tmp7 = GETJSAMPLE(elemptr[7]);
02041 
02042     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[14]);
02043     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[13]);
02044     tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[12]);
02045     tmp13 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[11]);
02046     tmp14 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[10]);
02047     tmp15 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[9]);
02048     tmp16 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[8]);
02049 
02050     z1 = tmp0 + tmp4 + tmp5;
02051     z2 = tmp1 + tmp3 + tmp6;
02052     z3 = tmp2 + tmp7;
02053     /* Apply unsigned->signed conversion. */
02054     dataptr[0] = (DCTELEM) (z1 + z2 + z3 - 15 * CENTERJSAMPLE);
02055     z3 += z3;
02056     dataptr[6] = (DCTELEM)
02057       DESCALE(MULTIPLY(z1 - z3, FIX(1.144122806)) - /* c6 */
02058           MULTIPLY(z2 - z3, FIX(0.437016024)),  /* c12 */
02059           CONST_BITS);
02060     tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
02061     z1 = MULTIPLY(tmp3 - tmp2, FIX(1.531135173)) -  /* c2+c14 */
02062          MULTIPLY(tmp6 - tmp2, FIX(2.238241955));   /* c4+c8 */
02063     z2 = MULTIPLY(tmp5 - tmp2, FIX(0.798468008)) -  /* c8-c14 */
02064      MULTIPLY(tmp0 - tmp2, FIX(0.091361227));   /* c2-c4 */
02065     z3 = MULTIPLY(tmp0 - tmp3, FIX(1.383309603)) +  /* c2 */
02066      MULTIPLY(tmp6 - tmp5, FIX(0.946293579)) +  /* c8 */
02067      MULTIPLY(tmp1 - tmp4, FIX(0.790569415));   /* (c6+c12)/2 */
02068 
02069     dataptr[2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS);
02070     dataptr[4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS);
02071 
02072     /* Odd part */
02073 
02074     tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
02075             FIX(1.224744871));                         /* c5 */
02076     tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.344997024)) + /* c3 */
02077        MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.831253876));  /* c9 */
02078     tmp12 = MULTIPLY(tmp12, FIX(1.224744871));                 /* c5 */
02079     tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.406466353)) +         /* c1 */
02080        MULTIPLY(tmp11 + tmp14, FIX(1.344997024)) +         /* c3 */
02081        MULTIPLY(tmp13 + tmp15, FIX(0.575212477));          /* c11 */
02082     tmp0 = MULTIPLY(tmp13, FIX(0.475753014)) -                 /* c7-c11 */
02083        MULTIPLY(tmp14, FIX(0.513743148)) +                 /* c3-c9 */
02084        MULTIPLY(tmp16, FIX(1.700497885)) + tmp4 + tmp12;   /* c1+c13 */
02085     tmp3 = MULTIPLY(tmp10, - FIX(0.355500862)) -               /* -(c1-c7) */
02086        MULTIPLY(tmp11, FIX(2.176250899)) -                 /* c3+c9 */
02087        MULTIPLY(tmp15, FIX(0.869244010)) + tmp4 - tmp12;   /* c11+c13 */
02088 
02089     dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS);
02090     dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS);
02091     dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS);
02092     dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS);
02093 
02094     ctr++;
02095 
02096     if (ctr != DCTSIZE) {
02097       if (ctr == 15)
02098     break;          /* Done. */
02099       dataptr += DCTSIZE;   /* advance pointer to next row */
02100     } else
02101       dataptr = workspace;  /* switch pointer to extended workspace */
02102   }
02103 
02104   /* Pass 2: process columns.
02105    * We leave the results scaled up by an overall factor of 8.
02106    * We must also scale the output by (8/15)**2 = 64/225, which we partially
02107    * fold into the constant multipliers and final shifting:
02108    * cK now represents sqrt(2) * cos(K*pi/30) * 256/225.
02109    */
02110 
02111   dataptr = data;
02112   wsptr = workspace;
02113   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
02114     /* Even part */
02115 
02116     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*6];
02117     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*5];
02118     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*4];
02119     tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*3];
02120     tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*2];
02121     tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*1];
02122     tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*0];
02123     tmp7 = dataptr[DCTSIZE*7];
02124 
02125     tmp10 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*6];
02126     tmp11 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*5];
02127     tmp12 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*4];
02128     tmp13 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*3];
02129     tmp14 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*2];
02130     tmp15 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*1];
02131     tmp16 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*0];
02132 
02133     z1 = tmp0 + tmp4 + tmp5;
02134     z2 = tmp1 + tmp3 + tmp6;
02135     z3 = tmp2 + tmp7;
02136     dataptr[DCTSIZE*0] = (DCTELEM)
02137       DESCALE(MULTIPLY(z1 + z2 + z3, FIX(1.137777778)), /* 256/225 */
02138           CONST_BITS+2);
02139     z3 += z3;
02140     dataptr[DCTSIZE*6] = (DCTELEM)
02141       DESCALE(MULTIPLY(z1 - z3, FIX(1.301757503)) - /* c6 */
02142           MULTIPLY(z2 - z3, FIX(0.497227121)),  /* c12 */
02143           CONST_BITS+2);
02144     tmp2 += ((tmp1 + tmp4) >> 1) - tmp7 - tmp7;
02145     z1 = MULTIPLY(tmp3 - tmp2, FIX(1.742091575)) -  /* c2+c14 */
02146          MULTIPLY(tmp6 - tmp2, FIX(2.546621957));   /* c4+c8 */
02147     z2 = MULTIPLY(tmp5 - tmp2, FIX(0.908479156)) -  /* c8-c14 */
02148      MULTIPLY(tmp0 - tmp2, FIX(0.103948774));   /* c2-c4 */
02149     z3 = MULTIPLY(tmp0 - tmp3, FIX(1.573898926)) +  /* c2 */
02150      MULTIPLY(tmp6 - tmp5, FIX(1.076671805)) +  /* c8 */
02151      MULTIPLY(tmp1 - tmp4, FIX(0.899492312));   /* (c6+c12)/2 */
02152 
02153     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z3, CONST_BITS+2);
02154     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(z2 + z3, CONST_BITS+2);
02155 
02156     /* Odd part */
02157 
02158     tmp2 = MULTIPLY(tmp10 - tmp12 - tmp13 + tmp15 + tmp16,
02159             FIX(1.393487498));                         /* c5 */
02160     tmp1 = MULTIPLY(tmp10 - tmp14 - tmp15, FIX(1.530307725)) + /* c3 */
02161        MULTIPLY(tmp11 - tmp13 - tmp16, FIX(0.945782187));  /* c9 */
02162     tmp12 = MULTIPLY(tmp12, FIX(1.393487498));                 /* c5 */
02163     tmp4 = MULTIPLY(tmp10 - tmp16, FIX(1.600246161)) +         /* c1 */
02164        MULTIPLY(tmp11 + tmp14, FIX(1.530307725)) +         /* c3 */
02165        MULTIPLY(tmp13 + tmp15, FIX(0.654463974));          /* c11 */
02166     tmp0 = MULTIPLY(tmp13, FIX(0.541301207)) -                 /* c7-c11 */
02167        MULTIPLY(tmp14, FIX(0.584525538)) +                 /* c3-c9 */
02168        MULTIPLY(tmp16, FIX(1.934788705)) + tmp4 + tmp12;   /* c1+c13 */
02169     tmp3 = MULTIPLY(tmp10, - FIX(0.404480980)) -               /* -(c1-c7) */
02170        MULTIPLY(tmp11, FIX(2.476089912)) -                 /* c3+c9 */
02171        MULTIPLY(tmp15, FIX(0.989006518)) + tmp4 - tmp12;   /* c11+c13 */
02172 
02173     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+2);
02174     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+2);
02175     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+2);
02176     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+2);
02177 
02178     dataptr++;          /* advance pointer to next column */
02179     wsptr++;            /* advance pointer to next column */
02180   }
02181 }
02182 
02183 
02184 /*
02185  * Perform the forward DCT on a 16x16 sample block.
02186  */
02187 
02188 GLOBAL(void)
02189 jpeg_fdct_16x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
02190 {
02191   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
02192   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
02193   DCTELEM workspace[DCTSIZE2];
02194   DCTELEM *dataptr;
02195   DCTELEM *wsptr;
02196   JSAMPROW elemptr;
02197   int ctr;
02198   SHIFT_TEMPS
02199 
02200   /* Pass 1: process rows.
02201    * Note results are scaled up by sqrt(8) compared to a true DCT;
02202    * furthermore, we scale the results by 2**PASS1_BITS.
02203    * cK represents sqrt(2) * cos(K*pi/32).
02204    */
02205 
02206   dataptr = data;
02207   ctr = 0;
02208   for (;;) {
02209     elemptr = sample_data[ctr] + start_col;
02210 
02211     /* Even part */
02212 
02213     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
02214     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
02215     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
02216     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
02217     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
02218     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
02219     tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
02220     tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
02221 
02222     tmp10 = tmp0 + tmp7;
02223     tmp14 = tmp0 - tmp7;
02224     tmp11 = tmp1 + tmp6;
02225     tmp15 = tmp1 - tmp6;
02226     tmp12 = tmp2 + tmp5;
02227     tmp16 = tmp2 - tmp5;
02228     tmp13 = tmp3 + tmp4;
02229     tmp17 = tmp3 - tmp4;
02230 
02231     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
02232     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
02233     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
02234     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
02235     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
02236     tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
02237     tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
02238     tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
02239 
02240     /* Apply unsigned->signed conversion. */
02241     dataptr[0] = (DCTELEM)
02242       ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
02243     dataptr[4] = (DCTELEM)
02244       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
02245           MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
02246           CONST_BITS-PASS1_BITS);
02247 
02248     tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
02249         MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
02250 
02251     dataptr[2] = (DCTELEM)
02252       DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
02253           + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
02254           CONST_BITS-PASS1_BITS);
02255     dataptr[6] = (DCTELEM)
02256       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
02257           - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
02258           CONST_BITS-PASS1_BITS);
02259 
02260     /* Odd part */
02261 
02262     tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
02263         MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
02264     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
02265         MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
02266     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
02267         MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
02268     tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
02269         MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
02270     tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
02271         MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
02272     tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
02273         MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
02274     tmp10 = tmp11 + tmp12 + tmp13 -
02275         MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
02276         MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
02277     tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
02278          - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
02279     tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
02280          + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
02281     tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
02282          + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
02283 
02284     dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
02285     dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
02286     dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
02287     dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
02288 
02289     ctr++;
02290 
02291     if (ctr != DCTSIZE) {
02292       if (ctr == DCTSIZE * 2)
02293     break;          /* Done. */
02294       dataptr += DCTSIZE;   /* advance pointer to next row */
02295     } else
02296       dataptr = workspace;  /* switch pointer to extended workspace */
02297   }
02298 
02299   /* Pass 2: process columns.
02300    * We remove the PASS1_BITS scaling, but leave the results scaled up
02301    * by an overall factor of 8.
02302    * We must also scale the output by (8/16)**2 = 1/2**2.
02303    * cK represents sqrt(2) * cos(K*pi/32).
02304    */
02305 
02306   dataptr = data;
02307   wsptr = workspace;
02308   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
02309     /* Even part */
02310 
02311     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
02312     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
02313     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
02314     tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
02315     tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
02316     tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
02317     tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
02318     tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
02319 
02320     tmp10 = tmp0 + tmp7;
02321     tmp14 = tmp0 - tmp7;
02322     tmp11 = tmp1 + tmp6;
02323     tmp15 = tmp1 - tmp6;
02324     tmp12 = tmp2 + tmp5;
02325     tmp16 = tmp2 - tmp5;
02326     tmp13 = tmp3 + tmp4;
02327     tmp17 = tmp3 - tmp4;
02328 
02329     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
02330     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
02331     tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
02332     tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
02333     tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
02334     tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
02335     tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
02336     tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
02337 
02338     dataptr[DCTSIZE*0] = (DCTELEM)
02339       DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+2);
02340     dataptr[DCTSIZE*4] = (DCTELEM)
02341       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
02342           MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
02343           CONST_BITS+PASS1_BITS+2);
02344 
02345     tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
02346         MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
02347 
02348     dataptr[DCTSIZE*2] = (DCTELEM)
02349       DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
02350           + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+10 */
02351           CONST_BITS+PASS1_BITS+2);
02352     dataptr[DCTSIZE*6] = (DCTELEM)
02353       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
02354           - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
02355           CONST_BITS+PASS1_BITS+2);
02356 
02357     /* Odd part */
02358 
02359     tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
02360         MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
02361     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
02362         MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
02363     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
02364         MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
02365     tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
02366         MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
02367     tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
02368         MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
02369     tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
02370         MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
02371     tmp10 = tmp11 + tmp12 + tmp13 -
02372         MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
02373         MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
02374     tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
02375          - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
02376     tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
02377          + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
02378     tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
02379          + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
02380 
02381     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+2);
02382     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+2);
02383     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+2);
02384     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+2);
02385 
02386     dataptr++;          /* advance pointer to next column */
02387     wsptr++;            /* advance pointer to next column */
02388   }
02389 }
02390 
02391 
02392 /*
02393  * Perform the forward DCT on a 16x8 sample block.
02394  *
02395  * 16-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
02396  */
02397 
02398 GLOBAL(void)
02399 jpeg_fdct_16x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
02400 {
02401   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
02402   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
02403   INT32 z1;
02404   DCTELEM *dataptr;
02405   JSAMPROW elemptr;
02406   int ctr;
02407   SHIFT_TEMPS
02408 
02409   /* Pass 1: process rows.
02410    * Note results are scaled up by sqrt(8) compared to a true DCT;
02411    * furthermore, we scale the results by 2**PASS1_BITS.
02412    * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
02413    */
02414 
02415   dataptr = data;
02416   ctr = 0;
02417   for (ctr = 0; ctr < DCTSIZE; ctr++) {
02418     elemptr = sample_data[ctr] + start_col;
02419 
02420     /* Even part */
02421 
02422     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[15]);
02423     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[14]);
02424     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[13]);
02425     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[12]);
02426     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[11]);
02427     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[10]);
02428     tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[9]);
02429     tmp7 = GETJSAMPLE(elemptr[7]) + GETJSAMPLE(elemptr[8]);
02430 
02431     tmp10 = tmp0 + tmp7;
02432     tmp14 = tmp0 - tmp7;
02433     tmp11 = tmp1 + tmp6;
02434     tmp15 = tmp1 - tmp6;
02435     tmp12 = tmp2 + tmp5;
02436     tmp16 = tmp2 - tmp5;
02437     tmp13 = tmp3 + tmp4;
02438     tmp17 = tmp3 - tmp4;
02439 
02440     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[15]);
02441     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[14]);
02442     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[13]);
02443     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[12]);
02444     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[11]);
02445     tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[10]);
02446     tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[9]);
02447     tmp7 = GETJSAMPLE(elemptr[7]) - GETJSAMPLE(elemptr[8]);
02448 
02449     /* Apply unsigned->signed conversion. */
02450     dataptr[0] = (DCTELEM)
02451       ((tmp10 + tmp11 + tmp12 + tmp13 - 16 * CENTERJSAMPLE) << PASS1_BITS);
02452     dataptr[4] = (DCTELEM)
02453       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
02454           MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
02455           CONST_BITS-PASS1_BITS);
02456 
02457     tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
02458         MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
02459 
02460     dataptr[2] = (DCTELEM)
02461       DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
02462           + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
02463           CONST_BITS-PASS1_BITS);
02464     dataptr[6] = (DCTELEM)
02465       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
02466           - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
02467           CONST_BITS-PASS1_BITS);
02468 
02469     /* Odd part */
02470 
02471     tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
02472         MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
02473     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
02474         MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
02475     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
02476         MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
02477     tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
02478         MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
02479     tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
02480         MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
02481     tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
02482         MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
02483     tmp10 = tmp11 + tmp12 + tmp13 -
02484         MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
02485         MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
02486     tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
02487          - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
02488     tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
02489          + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
02490     tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
02491          + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
02492 
02493     dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
02494     dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
02495     dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
02496     dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
02497 
02498     dataptr += DCTSIZE;     /* advance pointer to next row */
02499   }
02500 
02501   /* Pass 2: process columns.
02502    * We remove the PASS1_BITS scaling, but leave the results scaled up
02503    * by an overall factor of 8.
02504    * We must also scale the output by 8/16 = 1/2.
02505    * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
02506    */
02507 
02508   dataptr = data;
02509   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
02510     /* Even part per LL&M figure 1 --- note that published figure is faulty;
02511      * rotator "c1" should be "c6".
02512      */
02513 
02514     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
02515     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
02516     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
02517     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
02518 
02519     tmp10 = tmp0 + tmp3;
02520     tmp12 = tmp0 - tmp3;
02521     tmp11 = tmp1 + tmp2;
02522     tmp13 = tmp1 - tmp2;
02523 
02524     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
02525     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
02526     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
02527     tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
02528 
02529     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS+1);
02530     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS+1);
02531 
02532     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
02533     dataptr[DCTSIZE*2] = (DCTELEM)
02534       DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
02535           CONST_BITS+PASS1_BITS+1);
02536     dataptr[DCTSIZE*6] = (DCTELEM)
02537       DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
02538           CONST_BITS+PASS1_BITS+1);
02539 
02540     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
02541      * i0..i3 in the paper are tmp0..tmp3 here.
02542      */
02543 
02544     tmp12 = tmp0 + tmp2;
02545     tmp13 = tmp1 + tmp3;
02546 
02547     z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
02548     tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
02549     tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
02550     tmp12 += z1;
02551     tmp13 += z1;
02552 
02553     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
02554     tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
02555     tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
02556     tmp0 += z1 + tmp12;
02557     tmp3 += z1 + tmp13;
02558 
02559     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
02560     tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
02561     tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
02562     tmp1 += z1 + tmp13;
02563     tmp2 += z1 + tmp12;
02564 
02565     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
02566     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
02567     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
02568     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp3, CONST_BITS+PASS1_BITS+1);
02569 
02570     dataptr++;          /* advance pointer to next column */
02571   }
02572 }
02573 
02574 
02575 /*
02576  * Perform the forward DCT on a 14x7 sample block.
02577  *
02578  * 14-point FDCT in pass 1 (rows), 7-point in pass 2 (columns).
02579  */
02580 
02581 GLOBAL(void)
02582 jpeg_fdct_14x7 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
02583 {
02584   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
02585   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
02586   INT32 z1, z2, z3;
02587   DCTELEM *dataptr;
02588   JSAMPROW elemptr;
02589   int ctr;
02590   SHIFT_TEMPS
02591 
02592   /* Zero bottom row of output coefficient block. */
02593   MEMZERO(&data[DCTSIZE*7], SIZEOF(DCTELEM) * DCTSIZE);
02594 
02595   /* Pass 1: process rows.
02596    * Note results are scaled up by sqrt(8) compared to a true DCT;
02597    * furthermore, we scale the results by 2**PASS1_BITS.
02598    * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28).
02599    */
02600 
02601   dataptr = data;
02602   for (ctr = 0; ctr < 7; ctr++) {
02603     elemptr = sample_data[ctr] + start_col;
02604 
02605     /* Even part */
02606 
02607     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[13]);
02608     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[12]);
02609     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[11]);
02610     tmp13 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[10]);
02611     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[9]);
02612     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[8]);
02613     tmp6 = GETJSAMPLE(elemptr[6]) + GETJSAMPLE(elemptr[7]);
02614 
02615     tmp10 = tmp0 + tmp6;
02616     tmp14 = tmp0 - tmp6;
02617     tmp11 = tmp1 + tmp5;
02618     tmp15 = tmp1 - tmp5;
02619     tmp12 = tmp2 + tmp4;
02620     tmp16 = tmp2 - tmp4;
02621 
02622     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[13]);
02623     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[12]);
02624     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[11]);
02625     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[10]);
02626     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[9]);
02627     tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[8]);
02628     tmp6 = GETJSAMPLE(elemptr[6]) - GETJSAMPLE(elemptr[7]);
02629 
02630     /* Apply unsigned->signed conversion. */
02631     dataptr[0] = (DCTELEM)
02632       ((tmp10 + tmp11 + tmp12 + tmp13 - 14 * CENTERJSAMPLE) << PASS1_BITS);
02633     tmp13 += tmp13;
02634     dataptr[4] = (DCTELEM)
02635       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.274162392)) + /* c4 */
02636           MULTIPLY(tmp11 - tmp13, FIX(0.314692123)) - /* c12 */
02637           MULTIPLY(tmp12 - tmp13, FIX(0.881747734)),  /* c8 */
02638           CONST_BITS-PASS1_BITS);
02639 
02640     tmp10 = MULTIPLY(tmp14 + tmp15, FIX(1.105676686));    /* c6 */
02641 
02642     dataptr[2] = (DCTELEM)
02643       DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.273079590))   /* c2-c6 */
02644           + MULTIPLY(tmp16, FIX(0.613604268)),        /* c10 */
02645           CONST_BITS-PASS1_BITS);
02646     dataptr[6] = (DCTELEM)
02647       DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.719280954))   /* c6+c10 */
02648           - MULTIPLY(tmp16, FIX(1.378756276)),        /* c2 */
02649           CONST_BITS-PASS1_BITS);
02650 
02651     /* Odd part */
02652 
02653     tmp10 = tmp1 + tmp2;
02654     tmp11 = tmp5 - tmp4;
02655     dataptr[7] = (DCTELEM) ((tmp0 - tmp10 + tmp3 - tmp11 - tmp6) << PASS1_BITS);
02656     tmp3 <<= CONST_BITS;
02657     tmp10 = MULTIPLY(tmp10, - FIX(0.158341681));          /* -c13 */
02658     tmp11 = MULTIPLY(tmp11, FIX(1.405321284));            /* c1 */
02659     tmp10 += tmp11 - tmp3;
02660     tmp11 = MULTIPLY(tmp0 + tmp2, FIX(1.197448846)) +     /* c5 */
02661         MULTIPLY(tmp4 + tmp6, FIX(0.752406978));      /* c9 */
02662     dataptr[5] = (DCTELEM)
02663       DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(2.373959773)) /* c3+c5-c13 */
02664           + MULTIPLY(tmp4, FIX(1.119999435)),         /* c1+c11-c9 */
02665           CONST_BITS-PASS1_BITS);
02666     tmp12 = MULTIPLY(tmp0 + tmp1, FIX(1.334852607)) +     /* c3 */
02667         MULTIPLY(tmp5 - tmp6, FIX(0.467085129));      /* c11 */
02668     dataptr[3] = (DCTELEM)
02669       DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.424103948)) /* c3-c9-c13 */
02670           - MULTIPLY(tmp5, FIX(3.069855259)),         /* c1+c5+c11 */
02671           CONST_BITS-PASS1_BITS);
02672     dataptr[1] = (DCTELEM)
02673       DESCALE(tmp11 + tmp12 + tmp3 + tmp6 -
02674           MULTIPLY(tmp0 + tmp6, FIX(1.126980169)),    /* c3+c5-c1 */
02675           CONST_BITS-PASS1_BITS);
02676 
02677     dataptr += DCTSIZE;     /* advance pointer to next row */
02678   }
02679 
02680   /* Pass 2: process columns.
02681    * We remove the PASS1_BITS scaling, but leave the results scaled up
02682    * by an overall factor of 8.
02683    * We must also scale the output by (8/14)*(8/7) = 32/49, which we
02684    * partially fold into the constant multipliers and final shifting:
02685    * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14) * 64/49.
02686    */
02687 
02688   dataptr = data;
02689   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
02690     /* Even part */
02691 
02692     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*6];
02693     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*5];
02694     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*4];
02695     tmp3 = dataptr[DCTSIZE*3];
02696 
02697     tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*6];
02698     tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*5];
02699     tmp12 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*4];
02700 
02701     z1 = tmp0 + tmp2;
02702     dataptr[DCTSIZE*0] = (DCTELEM)
02703       DESCALE(MULTIPLY(z1 + tmp1 + tmp3, FIX(1.306122449)), /* 64/49 */
02704           CONST_BITS+PASS1_BITS+1);
02705     tmp3 += tmp3;
02706     z1 -= tmp3;
02707     z1 -= tmp3;
02708     z1 = MULTIPLY(z1, FIX(0.461784020));                /* (c2+c6-c4)/2 */
02709     z2 = MULTIPLY(tmp0 - tmp2, FIX(1.202428084));       /* (c2+c4-c6)/2 */
02710     z3 = MULTIPLY(tmp1 - tmp2, FIX(0.411026446));       /* c6 */
02711     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS+PASS1_BITS+1);
02712     z1 -= z2;
02713     z2 = MULTIPLY(tmp0 - tmp1, FIX(1.151670509));       /* c4 */
02714     dataptr[DCTSIZE*4] = (DCTELEM)
02715       DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.923568041)), /* c2+c6-c4 */
02716           CONST_BITS+PASS1_BITS+1);
02717     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS+PASS1_BITS+1);
02718 
02719     /* Odd part */
02720 
02721     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(1.221765677));   /* (c3+c1-c5)/2 */
02722     tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.222383464));   /* (c3+c5-c1)/2 */
02723     tmp0 = tmp1 - tmp2;
02724     tmp1 += tmp2;
02725     tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.800824523)); /* -c1 */
02726     tmp1 += tmp2;
02727     tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.801442310));   /* c5 */
02728     tmp0 += tmp3;
02729     tmp2 += tmp3 + MULTIPLY(tmp12, FIX(2.443531355));   /* c3+c1-c5 */
02730 
02731     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp0, CONST_BITS+PASS1_BITS+1);
02732     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp1, CONST_BITS+PASS1_BITS+1);
02733     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp2, CONST_BITS+PASS1_BITS+1);
02734 
02735     dataptr++;          /* advance pointer to next column */
02736   }
02737 }
02738 
02739 
02740 /*
02741  * Perform the forward DCT on a 12x6 sample block.
02742  *
02743  * 12-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
02744  */
02745 
02746 GLOBAL(void)
02747 jpeg_fdct_12x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
02748 {
02749   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
02750   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
02751   DCTELEM *dataptr;
02752   JSAMPROW elemptr;
02753   int ctr;
02754   SHIFT_TEMPS
02755 
02756   /* Zero 2 bottom rows of output coefficient block. */
02757   MEMZERO(&data[DCTSIZE*6], SIZEOF(DCTELEM) * DCTSIZE * 2);
02758 
02759   /* Pass 1: process rows.
02760    * Note results are scaled up by sqrt(8) compared to a true DCT;
02761    * furthermore, we scale the results by 2**PASS1_BITS.
02762    * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24).
02763    */
02764 
02765   dataptr = data;
02766   for (ctr = 0; ctr < 6; ctr++) {
02767     elemptr = sample_data[ctr] + start_col;
02768 
02769     /* Even part */
02770 
02771     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[11]);
02772     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[10]);
02773     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[9]);
02774     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[8]);
02775     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[7]);
02776     tmp5 = GETJSAMPLE(elemptr[5]) + GETJSAMPLE(elemptr[6]);
02777 
02778     tmp10 = tmp0 + tmp5;
02779     tmp13 = tmp0 - tmp5;
02780     tmp11 = tmp1 + tmp4;
02781     tmp14 = tmp1 - tmp4;
02782     tmp12 = tmp2 + tmp3;
02783     tmp15 = tmp2 - tmp3;
02784 
02785     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[11]);
02786     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[10]);
02787     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[9]);
02788     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[8]);
02789     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[7]);
02790     tmp5 = GETJSAMPLE(elemptr[5]) - GETJSAMPLE(elemptr[6]);
02791 
02792     /* Apply unsigned->signed conversion. */
02793     dataptr[0] = (DCTELEM)
02794       ((tmp10 + tmp11 + tmp12 - 12 * CENTERJSAMPLE) << PASS1_BITS);
02795     dataptr[6] = (DCTELEM) ((tmp13 - tmp14 - tmp15) << PASS1_BITS);
02796     dataptr[4] = (DCTELEM)
02797       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.224744871)), /* c4 */
02798           CONST_BITS-PASS1_BITS);
02799     dataptr[2] = (DCTELEM)
02800       DESCALE(tmp14 - tmp15 + MULTIPLY(tmp13 + tmp15, FIX(1.366025404)), /* c2 */
02801           CONST_BITS-PASS1_BITS);
02802 
02803     /* Odd part */
02804 
02805     tmp10 = MULTIPLY(tmp1 + tmp4, FIX_0_541196100);    /* c9 */
02806     tmp14 = tmp10 + MULTIPLY(tmp1, FIX_0_765366865);   /* c3-c9 */
02807     tmp15 = tmp10 - MULTIPLY(tmp4, FIX_1_847759065);   /* c3+c9 */
02808     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.121971054));   /* c5 */
02809     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.860918669));   /* c7 */
02810     tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.580774953)) /* c5+c7-c1 */
02811         + MULTIPLY(tmp5, FIX(0.184591911));        /* c11 */
02812     tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.184591911)); /* -c11 */
02813     tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.339493912)) /* c1+c5-c11 */
02814         + MULTIPLY(tmp5, FIX(0.860918669));        /* c7 */
02815     tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.725788011)) /* c1+c11-c7 */
02816         - MULTIPLY(tmp5, FIX(1.121971054));        /* c5 */
02817     tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.306562965)) /* c3 */
02818         - MULTIPLY(tmp2 + tmp5, FIX_0_541196100);  /* c9 */
02819 
02820     dataptr[1] = (DCTELEM) DESCALE(tmp10, CONST_BITS-PASS1_BITS);
02821     dataptr[3] = (DCTELEM) DESCALE(tmp11, CONST_BITS-PASS1_BITS);
02822     dataptr[5] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
02823     dataptr[7] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
02824 
02825     dataptr += DCTSIZE;     /* advance pointer to next row */
02826   }
02827 
02828   /* Pass 2: process columns.
02829    * We remove the PASS1_BITS scaling, but leave the results scaled up
02830    * by an overall factor of 8.
02831    * We must also scale the output by (8/12)*(8/6) = 8/9, which we
02832    * partially fold into the constant multipliers and final shifting:
02833    * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
02834    */
02835 
02836   dataptr = data;
02837   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
02838     /* Even part */
02839 
02840     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
02841     tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
02842     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
02843 
02844     tmp10 = tmp0 + tmp2;
02845     tmp12 = tmp0 - tmp2;
02846 
02847     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
02848     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
02849     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
02850 
02851     dataptr[DCTSIZE*0] = (DCTELEM)
02852       DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
02853           CONST_BITS+PASS1_BITS+1);
02854     dataptr[DCTSIZE*2] = (DCTELEM)
02855       DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
02856           CONST_BITS+PASS1_BITS+1);
02857     dataptr[DCTSIZE*4] = (DCTELEM)
02858       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
02859           CONST_BITS+PASS1_BITS+1);
02860 
02861     /* Odd part */
02862 
02863     tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
02864 
02865     dataptr[DCTSIZE*1] = (DCTELEM)
02866       DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
02867           CONST_BITS+PASS1_BITS+1);
02868     dataptr[DCTSIZE*3] = (DCTELEM)
02869       DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
02870           CONST_BITS+PASS1_BITS+1);
02871     dataptr[DCTSIZE*5] = (DCTELEM)
02872       DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
02873           CONST_BITS+PASS1_BITS+1);
02874 
02875     dataptr++;          /* advance pointer to next column */
02876   }
02877 }
02878 
02879 
02880 /*
02881  * Perform the forward DCT on a 10x5 sample block.
02882  *
02883  * 10-point FDCT in pass 1 (rows), 5-point in pass 2 (columns).
02884  */
02885 
02886 GLOBAL(void)
02887 jpeg_fdct_10x5 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
02888 {
02889   INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
02890   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
02891   DCTELEM *dataptr;
02892   JSAMPROW elemptr;
02893   int ctr;
02894   SHIFT_TEMPS
02895 
02896   /* Zero 3 bottom rows of output coefficient block. */
02897   MEMZERO(&data[DCTSIZE*5], SIZEOF(DCTELEM) * DCTSIZE * 3);
02898 
02899   /* Pass 1: process rows.
02900    * Note results are scaled up by sqrt(8) compared to a true DCT;
02901    * furthermore, we scale the results by 2**PASS1_BITS.
02902    * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20).
02903    */
02904 
02905   dataptr = data;
02906   for (ctr = 0; ctr < 5; ctr++) {
02907     elemptr = sample_data[ctr] + start_col;
02908 
02909     /* Even part */
02910 
02911     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[9]);
02912     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[8]);
02913     tmp12 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[7]);
02914     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[6]);
02915     tmp4 = GETJSAMPLE(elemptr[4]) + GETJSAMPLE(elemptr[5]);
02916 
02917     tmp10 = tmp0 + tmp4;
02918     tmp13 = tmp0 - tmp4;
02919     tmp11 = tmp1 + tmp3;
02920     tmp14 = tmp1 - tmp3;
02921 
02922     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[9]);
02923     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[8]);
02924     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[7]);
02925     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[6]);
02926     tmp4 = GETJSAMPLE(elemptr[4]) - GETJSAMPLE(elemptr[5]);
02927 
02928     /* Apply unsigned->signed conversion. */
02929     dataptr[0] = (DCTELEM)
02930       ((tmp10 + tmp11 + tmp12 - 10 * CENTERJSAMPLE) << PASS1_BITS);
02931     tmp12 += tmp12;
02932     dataptr[4] = (DCTELEM)
02933       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.144122806)) - /* c4 */
02934           MULTIPLY(tmp11 - tmp12, FIX(0.437016024)),  /* c8 */
02935           CONST_BITS-PASS1_BITS);
02936     tmp10 = MULTIPLY(tmp13 + tmp14, FIX(0.831253876));    /* c6 */
02937     dataptr[2] = (DCTELEM)
02938       DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.513743148)),  /* c2-c6 */
02939           CONST_BITS-PASS1_BITS);
02940     dataptr[6] = (DCTELEM)
02941       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.176250899)),  /* c2+c6 */
02942           CONST_BITS-PASS1_BITS);
02943 
02944     /* Odd part */
02945 
02946     tmp10 = tmp0 + tmp4;
02947     tmp11 = tmp1 - tmp3;
02948     dataptr[5] = (DCTELEM) ((tmp10 - tmp11 - tmp2) << PASS1_BITS);
02949     tmp2 <<= CONST_BITS;
02950     dataptr[1] = (DCTELEM)
02951       DESCALE(MULTIPLY(tmp0, FIX(1.396802247)) +          /* c1 */
02952           MULTIPLY(tmp1, FIX(1.260073511)) + tmp2 +   /* c3 */
02953           MULTIPLY(tmp3, FIX(0.642039522)) +          /* c7 */
02954           MULTIPLY(tmp4, FIX(0.221231742)),           /* c9 */
02955           CONST_BITS-PASS1_BITS);
02956     tmp12 = MULTIPLY(tmp0 - tmp4, FIX(0.951056516)) -     /* (c3+c7)/2 */
02957         MULTIPLY(tmp1 + tmp3, FIX(0.587785252));      /* (c1-c9)/2 */
02958     tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.309016994)) +   /* (c3-c7)/2 */
02959         (tmp11 << (CONST_BITS - 1)) - tmp2;
02960     dataptr[3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS-PASS1_BITS);
02961     dataptr[7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS-PASS1_BITS);
02962 
02963     dataptr += DCTSIZE;     /* advance pointer to next row */
02964   }
02965 
02966   /* Pass 2: process columns.
02967    * We remove the PASS1_BITS scaling, but leave the results scaled up
02968    * by an overall factor of 8.
02969    * We must also scale the output by (8/10)*(8/5) = 32/25, which we
02970    * fold into the constant multipliers:
02971    * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10) * 32/25.
02972    */
02973 
02974   dataptr = data;
02975   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
02976     /* Even part */
02977 
02978     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*4];
02979     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*3];
02980     tmp2 = dataptr[DCTSIZE*2];
02981 
02982     tmp10 = tmp0 + tmp1;
02983     tmp11 = tmp0 - tmp1;
02984 
02985     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*4];
02986     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*3];
02987 
02988     dataptr[DCTSIZE*0] = (DCTELEM)
02989       DESCALE(MULTIPLY(tmp10 + tmp2, FIX(1.28)),        /* 32/25 */
02990           CONST_BITS+PASS1_BITS);
02991     tmp11 = MULTIPLY(tmp11, FIX(1.011928851));          /* (c2+c4)/2 */
02992     tmp10 -= tmp2 << 2;
02993     tmp10 = MULTIPLY(tmp10, FIX(0.452548340));          /* (c2-c4)/2 */
02994     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS+PASS1_BITS);
02995     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS+PASS1_BITS);
02996 
02997     /* Odd part */
02998 
02999     tmp10 = MULTIPLY(tmp0 + tmp1, FIX(1.064004961));    /* c3 */
03000 
03001     dataptr[DCTSIZE*1] = (DCTELEM)
03002       DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.657591230)), /* c1-c3 */
03003           CONST_BITS+PASS1_BITS);
03004     dataptr[DCTSIZE*3] = (DCTELEM)
03005       DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.785601151)), /* c1+c3 */
03006           CONST_BITS+PASS1_BITS);
03007 
03008     dataptr++;          /* advance pointer to next column */
03009   }
03010 }
03011 
03012 
03013 /*
03014  * Perform the forward DCT on an 8x4 sample block.
03015  *
03016  * 8-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
03017  */
03018 
03019 GLOBAL(void)
03020 jpeg_fdct_8x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03021 {
03022   INT32 tmp0, tmp1, tmp2, tmp3;
03023   INT32 tmp10, tmp11, tmp12, tmp13;
03024   INT32 z1;
03025   DCTELEM *dataptr;
03026   JSAMPROW elemptr;
03027   int ctr;
03028   SHIFT_TEMPS
03029 
03030   /* Zero 4 bottom rows of output coefficient block. */
03031   MEMZERO(&data[DCTSIZE*4], SIZEOF(DCTELEM) * DCTSIZE * 4);
03032 
03033   /* Pass 1: process rows.
03034    * Note results are scaled up by sqrt(8) compared to a true DCT;
03035    * furthermore, we scale the results by 2**PASS1_BITS.
03036    * We must also scale the output by 8/4 = 2, which we add here.
03037    * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
03038    */
03039 
03040   dataptr = data;
03041   for (ctr = 0; ctr < 4; ctr++) {
03042     elemptr = sample_data[ctr] + start_col;
03043 
03044     /* Even part per LL&M figure 1 --- note that published figure is faulty;
03045      * rotator "c1" should be "c6".
03046      */
03047 
03048     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
03049     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
03050     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
03051     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
03052 
03053     tmp10 = tmp0 + tmp3;
03054     tmp12 = tmp0 - tmp3;
03055     tmp11 = tmp1 + tmp2;
03056     tmp13 = tmp1 - tmp2;
03057 
03058     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
03059     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
03060     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
03061     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
03062 
03063     /* Apply unsigned->signed conversion. */
03064     dataptr[0] = (DCTELEM)
03065       ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << (PASS1_BITS+1));
03066     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << (PASS1_BITS+1));
03067 
03068     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
03069     /* Add fudge factor here for final descale. */
03070     z1 += ONE << (CONST_BITS-PASS1_BITS-2);
03071 
03072     dataptr[2] = (DCTELEM)
03073       RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
03074           CONST_BITS-PASS1_BITS-1);
03075     dataptr[6] = (DCTELEM)
03076       RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
03077           CONST_BITS-PASS1_BITS-1);
03078 
03079     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
03080      * i0..i3 in the paper are tmp0..tmp3 here.
03081      */
03082 
03083     tmp12 = tmp0 + tmp2;
03084     tmp13 = tmp1 + tmp3;
03085 
03086     z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
03087     /* Add fudge factor here for final descale. */
03088     z1 += ONE << (CONST_BITS-PASS1_BITS-2);
03089 
03090     tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
03091     tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
03092     tmp12 += z1;
03093     tmp13 += z1;
03094 
03095     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
03096     tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
03097     tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
03098     tmp0 += z1 + tmp12;
03099     tmp3 += z1 + tmp13;
03100 
03101     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
03102     tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
03103     tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
03104     tmp1 += z1 + tmp13;
03105     tmp2 += z1 + tmp12;
03106 
03107     dataptr[1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS-PASS1_BITS-1);
03108     dataptr[3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS-PASS1_BITS-1);
03109     dataptr[5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS-PASS1_BITS-1);
03110     dataptr[7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS-PASS1_BITS-1);
03111 
03112     dataptr += DCTSIZE;     /* advance pointer to next row */
03113   }
03114 
03115   /* Pass 2: process columns.
03116    * We remove the PASS1_BITS scaling, but leave the results scaled up
03117    * by an overall factor of 8.
03118    * 4-point FDCT kernel,
03119    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
03120    */
03121 
03122   dataptr = data;
03123   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
03124     /* Even part */
03125 
03126     /* Add fudge factor here for final descale. */
03127     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3] + (ONE << (PASS1_BITS-1));
03128     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
03129 
03130     tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
03131     tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
03132 
03133     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
03134     dataptr[DCTSIZE*2] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
03135 
03136     /* Odd part */
03137 
03138     tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
03139     /* Add fudge factor here for final descale. */
03140     tmp0 += ONE << (CONST_BITS+PASS1_BITS-1);
03141 
03142     dataptr[DCTSIZE*1] = (DCTELEM)
03143       RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
03144           CONST_BITS+PASS1_BITS);
03145     dataptr[DCTSIZE*3] = (DCTELEM)
03146       RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
03147           CONST_BITS+PASS1_BITS);
03148 
03149     dataptr++;          /* advance pointer to next column */
03150   }
03151 }
03152 
03153 
03154 /*
03155  * Perform the forward DCT on a 6x3 sample block.
03156  *
03157  * 6-point FDCT in pass 1 (rows), 3-point in pass 2 (columns).
03158  */
03159 
03160 GLOBAL(void)
03161 jpeg_fdct_6x3 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03162 {
03163   INT32 tmp0, tmp1, tmp2;
03164   INT32 tmp10, tmp11, tmp12;
03165   DCTELEM *dataptr;
03166   JSAMPROW elemptr;
03167   int ctr;
03168   SHIFT_TEMPS
03169 
03170   /* Pre-zero output coefficient block. */
03171   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
03172 
03173   /* Pass 1: process rows.
03174    * Note results are scaled up by sqrt(8) compared to a true DCT;
03175    * furthermore, we scale the results by 2**PASS1_BITS.
03176    * We scale the results further by 2 as part of output adaption
03177    * scaling for different DCT size.
03178    * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
03179    */
03180 
03181   dataptr = data;
03182   for (ctr = 0; ctr < 3; ctr++) {
03183     elemptr = sample_data[ctr] + start_col;
03184 
03185     /* Even part */
03186 
03187     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
03188     tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
03189     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
03190 
03191     tmp10 = tmp0 + tmp2;
03192     tmp12 = tmp0 - tmp2;
03193 
03194     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
03195     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
03196     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
03197 
03198     /* Apply unsigned->signed conversion. */
03199     dataptr[0] = (DCTELEM)
03200       ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << (PASS1_BITS+1));
03201     dataptr[2] = (DCTELEM)
03202       DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
03203           CONST_BITS-PASS1_BITS-1);
03204     dataptr[4] = (DCTELEM)
03205       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
03206           CONST_BITS-PASS1_BITS-1);
03207 
03208     /* Odd part */
03209 
03210     tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
03211             CONST_BITS-PASS1_BITS-1);
03212 
03213     dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << (PASS1_BITS+1)));
03214     dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << (PASS1_BITS+1));
03215     dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << (PASS1_BITS+1)));
03216 
03217     dataptr += DCTSIZE;     /* advance pointer to next row */
03218   }
03219 
03220   /* Pass 2: process columns.
03221    * We remove the PASS1_BITS scaling, but leave the results scaled up
03222    * by an overall factor of 8.
03223    * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
03224    * fold into the constant multipliers (other part was done in pass 1):
03225    * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6) * 16/9.
03226    */
03227 
03228   dataptr = data;
03229   for (ctr = 0; ctr < 6; ctr++) {
03230     /* Even part */
03231 
03232     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*2];
03233     tmp1 = dataptr[DCTSIZE*1];
03234 
03235     tmp2 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*2];
03236 
03237     dataptr[DCTSIZE*0] = (DCTELEM)
03238       DESCALE(MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),        /* 16/9 */
03239           CONST_BITS+PASS1_BITS);
03240     dataptr[DCTSIZE*2] = (DCTELEM)
03241       DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(1.257078722)), /* c2 */
03242           CONST_BITS+PASS1_BITS);
03243 
03244     /* Odd part */
03245 
03246     dataptr[DCTSIZE*1] = (DCTELEM)
03247       DESCALE(MULTIPLY(tmp2, FIX(2.177324216)),               /* c1 */
03248           CONST_BITS+PASS1_BITS);
03249 
03250     dataptr++;          /* advance pointer to next column */
03251   }
03252 }
03253 
03254 
03255 /*
03256  * Perform the forward DCT on a 4x2 sample block.
03257  *
03258  * 4-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
03259  */
03260 
03261 GLOBAL(void)
03262 jpeg_fdct_4x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03263 {
03264   INT32 tmp0, tmp1;
03265   INT32 tmp10, tmp11;
03266   DCTELEM *dataptr;
03267   JSAMPROW elemptr;
03268   int ctr;
03269   SHIFT_TEMPS
03270 
03271   /* Pre-zero output coefficient block. */
03272   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
03273 
03274   /* Pass 1: process rows.
03275    * Note results are scaled up by sqrt(8) compared to a true DCT;
03276    * furthermore, we scale the results by 2**PASS1_BITS.
03277    * We must also scale the output by (8/4)*(8/2) = 2**3, which we add here.
03278    * 4-point FDCT kernel,
03279    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
03280    */
03281 
03282   dataptr = data;
03283   for (ctr = 0; ctr < 2; ctr++) {
03284     elemptr = sample_data[ctr] + start_col;
03285 
03286     /* Even part */
03287 
03288     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
03289     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
03290 
03291     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
03292     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
03293 
03294     /* Apply unsigned->signed conversion. */
03295     dataptr[0] = (DCTELEM)
03296       ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+3));
03297     dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+3));
03298 
03299     /* Odd part */
03300 
03301     tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
03302     /* Add fudge factor here for final descale. */
03303     tmp0 += ONE << (CONST_BITS-PASS1_BITS-4);
03304 
03305     dataptr[1] = (DCTELEM)
03306       RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
03307           CONST_BITS-PASS1_BITS-3);
03308     dataptr[3] = (DCTELEM)
03309       RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
03310           CONST_BITS-PASS1_BITS-3);
03311 
03312     dataptr += DCTSIZE;     /* advance pointer to next row */
03313   }
03314 
03315   /* Pass 2: process columns.
03316    * We remove the PASS1_BITS scaling, but leave the results scaled up
03317    * by an overall factor of 8.
03318    */
03319 
03320   dataptr = data;
03321   for (ctr = 0; ctr < 4; ctr++) {
03322     /* Even part */
03323 
03324     /* Add fudge factor here for final descale. */
03325     tmp0 = dataptr[DCTSIZE*0] + (ONE << (PASS1_BITS-1));
03326     tmp1 = dataptr[DCTSIZE*1];
03327 
03328     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp0 + tmp1, PASS1_BITS);
03329 
03330     /* Odd part */
03331 
03332     dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0 - tmp1, PASS1_BITS);
03333 
03334     dataptr++;          /* advance pointer to next column */
03335   }
03336 }
03337 
03338 
03339 /*
03340  * Perform the forward DCT on a 2x1 sample block.
03341  *
03342  * 2-point FDCT in pass 1 (rows), 1-point in pass 2 (columns).
03343  */
03344 
03345 GLOBAL(void)
03346 jpeg_fdct_2x1 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03347 {
03348   DCTELEM tmp0, tmp1;
03349   JSAMPROW elemptr;
03350 
03351   /* Pre-zero output coefficient block. */
03352   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
03353 
03354   elemptr = sample_data[0] + start_col;
03355 
03356   tmp0 = GETJSAMPLE(elemptr[0]);
03357   tmp1 = GETJSAMPLE(elemptr[1]);
03358 
03359   /* We leave the results scaled up by an overall factor of 8.
03360    * We must also scale the output by (8/2)*(8/1) = 2**5.
03361    */
03362 
03363   /* Even part */
03364 
03365   /* Apply unsigned->signed conversion. */
03366   data[0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
03367 
03368   /* Odd part */
03369 
03370   data[1] = (tmp0 - tmp1) << 5;
03371 }
03372 
03373 
03374 /*
03375  * Perform the forward DCT on an 8x16 sample block.
03376  *
03377  * 8-point FDCT in pass 1 (rows), 16-point in pass 2 (columns).
03378  */
03379 
03380 GLOBAL(void)
03381 jpeg_fdct_8x16 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03382 {
03383   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
03384   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16, tmp17;
03385   INT32 z1;
03386   DCTELEM workspace[DCTSIZE2];
03387   DCTELEM *dataptr;
03388   DCTELEM *wsptr;
03389   JSAMPROW elemptr;
03390   int ctr;
03391   SHIFT_TEMPS
03392 
03393   /* Pass 1: process rows.
03394    * Note results are scaled up by sqrt(8) compared to a true DCT;
03395    * furthermore, we scale the results by 2**PASS1_BITS.
03396    * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
03397    */
03398 
03399   dataptr = data;
03400   ctr = 0;
03401   for (;;) {
03402     elemptr = sample_data[ctr] + start_col;
03403 
03404     /* Even part per LL&M figure 1 --- note that published figure is faulty;
03405      * rotator "c1" should be "c6".
03406      */
03407 
03408     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[7]);
03409     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[6]);
03410     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[5]);
03411     tmp3 = GETJSAMPLE(elemptr[3]) + GETJSAMPLE(elemptr[4]);
03412 
03413     tmp10 = tmp0 + tmp3;
03414     tmp12 = tmp0 - tmp3;
03415     tmp11 = tmp1 + tmp2;
03416     tmp13 = tmp1 - tmp2;
03417 
03418     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[7]);
03419     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[6]);
03420     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[5]);
03421     tmp3 = GETJSAMPLE(elemptr[3]) - GETJSAMPLE(elemptr[4]);
03422 
03423     /* Apply unsigned->signed conversion. */
03424     dataptr[0] = (DCTELEM) ((tmp10 + tmp11 - 8 * CENTERJSAMPLE) << PASS1_BITS);
03425     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
03426 
03427     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);   /* c6 */
03428     dataptr[2] = (DCTELEM)
03429       DESCALE(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
03430           CONST_BITS-PASS1_BITS);
03431     dataptr[6] = (DCTELEM)
03432       DESCALE(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
03433           CONST_BITS-PASS1_BITS);
03434 
03435     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
03436      * i0..i3 in the paper are tmp0..tmp3 here.
03437      */
03438 
03439     tmp12 = tmp0 + tmp2;
03440     tmp13 = tmp1 + tmp3;
03441 
03442     z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);   /*  c3 */
03443     tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);      /* -c3+c5 */
03444     tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);      /* -c3-c5 */
03445     tmp12 += z1;
03446     tmp13 += z1;
03447 
03448     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);   /* -c3+c7 */
03449     tmp0 = MULTIPLY(tmp0, FIX_1_501321110);          /*  c1+c3-c5-c7 */
03450     tmp3 = MULTIPLY(tmp3, FIX_0_298631336);          /* -c1+c3+c5-c7 */
03451     tmp0 += z1 + tmp12;
03452     tmp3 += z1 + tmp13;
03453 
03454     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);   /* -c1-c3 */
03455     tmp1 = MULTIPLY(tmp1, FIX_3_072711026);          /*  c1+c3+c5-c7 */
03456     tmp2 = MULTIPLY(tmp2, FIX_2_053119869);          /*  c1+c3-c5+c7 */
03457     tmp1 += z1 + tmp13;
03458     tmp2 += z1 + tmp12;
03459 
03460     dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
03461     dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
03462     dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
03463     dataptr[7] = (DCTELEM) DESCALE(tmp3, CONST_BITS-PASS1_BITS);
03464 
03465     ctr++;
03466 
03467     if (ctr != DCTSIZE) {
03468       if (ctr == DCTSIZE * 2)
03469     break;          /* Done. */
03470       dataptr += DCTSIZE;   /* advance pointer to next row */
03471     } else
03472       dataptr = workspace;  /* switch pointer to extended workspace */
03473   }
03474 
03475   /* Pass 2: process columns.
03476    * We remove the PASS1_BITS scaling, but leave the results scaled up
03477    * by an overall factor of 8.
03478    * We must also scale the output by 8/16 = 1/2.
03479    * 16-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/32).
03480    */
03481 
03482   dataptr = data;
03483   wsptr = workspace;
03484   for (ctr = DCTSIZE-1; ctr >= 0; ctr--) {
03485     /* Even part */
03486 
03487     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*7];
03488     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*6];
03489     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*5];
03490     tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*4];
03491     tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*3];
03492     tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*2];
03493     tmp6 = dataptr[DCTSIZE*6] + wsptr[DCTSIZE*1];
03494     tmp7 = dataptr[DCTSIZE*7] + wsptr[DCTSIZE*0];
03495 
03496     tmp10 = tmp0 + tmp7;
03497     tmp14 = tmp0 - tmp7;
03498     tmp11 = tmp1 + tmp6;
03499     tmp15 = tmp1 - tmp6;
03500     tmp12 = tmp2 + tmp5;
03501     tmp16 = tmp2 - tmp5;
03502     tmp13 = tmp3 + tmp4;
03503     tmp17 = tmp3 - tmp4;
03504 
03505     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*7];
03506     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*6];
03507     tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*5];
03508     tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*4];
03509     tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*3];
03510     tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*2];
03511     tmp6 = dataptr[DCTSIZE*6] - wsptr[DCTSIZE*1];
03512     tmp7 = dataptr[DCTSIZE*7] - wsptr[DCTSIZE*0];
03513 
03514     dataptr[DCTSIZE*0] = (DCTELEM)
03515       DESCALE(tmp10 + tmp11 + tmp12 + tmp13, PASS1_BITS+1);
03516     dataptr[DCTSIZE*4] = (DCTELEM)
03517       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(1.306562965)) + /* c4[16] = c2[8] */
03518           MULTIPLY(tmp11 - tmp12, FIX_0_541196100),   /* c12[16] = c6[8] */
03519           CONST_BITS+PASS1_BITS+1);
03520 
03521     tmp10 = MULTIPLY(tmp17 - tmp15, FIX(0.275899379)) +   /* c14[16] = c7[8] */
03522         MULTIPLY(tmp14 - tmp16, FIX(1.387039845));    /* c2[16] = c1[8] */
03523 
03524     dataptr[DCTSIZE*2] = (DCTELEM)
03525       DESCALE(tmp10 + MULTIPLY(tmp15, FIX(1.451774982))   /* c6+c14 */
03526           + MULTIPLY(tmp16, FIX(2.172734804)),        /* c2+c10 */
03527           CONST_BITS+PASS1_BITS+1);
03528     dataptr[DCTSIZE*6] = (DCTELEM)
03529       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(0.211164243))   /* c2-c6 */
03530           - MULTIPLY(tmp17, FIX(1.061594338)),        /* c10+c14 */
03531           CONST_BITS+PASS1_BITS+1);
03532 
03533     /* Odd part */
03534 
03535     tmp11 = MULTIPLY(tmp0 + tmp1, FIX(1.353318001)) +         /* c3 */
03536         MULTIPLY(tmp6 - tmp7, FIX(0.410524528));          /* c13 */
03537     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(1.247225013)) +         /* c5 */
03538         MULTIPLY(tmp5 + tmp7, FIX(0.666655658));          /* c11 */
03539     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(1.093201867)) +         /* c7 */
03540         MULTIPLY(tmp4 - tmp7, FIX(0.897167586));          /* c9 */
03541     tmp14 = MULTIPLY(tmp1 + tmp2, FIX(0.138617169)) +         /* c15 */
03542         MULTIPLY(tmp6 - tmp5, FIX(1.407403738));          /* c1 */
03543     tmp15 = MULTIPLY(tmp1 + tmp3, - FIX(0.666655658)) +       /* -c11 */
03544         MULTIPLY(tmp4 + tmp6, - FIX(1.247225013));        /* -c5 */
03545     tmp16 = MULTIPLY(tmp2 + tmp3, - FIX(1.353318001)) +       /* -c3 */
03546         MULTIPLY(tmp5 - tmp4, FIX(0.410524528));          /* c13 */
03547     tmp10 = tmp11 + tmp12 + tmp13 -
03548         MULTIPLY(tmp0, FIX(2.286341144)) +                /* c7+c5+c3-c1 */
03549         MULTIPLY(tmp7, FIX(0.779653625));                 /* c15+c13-c11+c9 */
03550     tmp11 += tmp14 + tmp15 + MULTIPLY(tmp1, FIX(0.071888074)) /* c9-c3-c15+c11 */
03551          - MULTIPLY(tmp6, FIX(1.663905119));              /* c7+c13+c1-c5 */
03552     tmp12 += tmp14 + tmp16 - MULTIPLY(tmp2, FIX(1.125726048)) /* c7+c5+c15-c3 */
03553          + MULTIPLY(tmp5, FIX(1.227391138));              /* c9-c11+c1-c13 */
03554     tmp13 += tmp15 + tmp16 + MULTIPLY(tmp3, FIX(1.065388962)) /* c15+c3+c11-c7 */
03555          + MULTIPLY(tmp4, FIX(2.167985692));              /* c1+c13+c5-c9 */
03556 
03557     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS+1);
03558     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS+1);
03559     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS+1);
03560     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS+1);
03561 
03562     dataptr++;          /* advance pointer to next column */
03563     wsptr++;            /* advance pointer to next column */
03564   }
03565 }
03566 
03567 
03568 /*
03569  * Perform the forward DCT on a 7x14 sample block.
03570  *
03571  * 7-point FDCT in pass 1 (rows), 14-point in pass 2 (columns).
03572  */
03573 
03574 GLOBAL(void)
03575 jpeg_fdct_7x14 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03576 {
03577   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
03578   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
03579   INT32 z1, z2, z3;
03580   DCTELEM workspace[8*6];
03581   DCTELEM *dataptr;
03582   DCTELEM *wsptr;
03583   JSAMPROW elemptr;
03584   int ctr;
03585   SHIFT_TEMPS
03586 
03587   /* Pre-zero output coefficient block. */
03588   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
03589 
03590   /* Pass 1: process rows.
03591    * Note results are scaled up by sqrt(8) compared to a true DCT;
03592    * furthermore, we scale the results by 2**PASS1_BITS.
03593    * 7-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/14).
03594    */
03595 
03596   dataptr = data;
03597   ctr = 0;
03598   for (;;) {
03599     elemptr = sample_data[ctr] + start_col;
03600 
03601     /* Even part */
03602 
03603     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[6]);
03604     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[5]);
03605     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[4]);
03606     tmp3 = GETJSAMPLE(elemptr[3]);
03607 
03608     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[6]);
03609     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[5]);
03610     tmp12 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[4]);
03611 
03612     z1 = tmp0 + tmp2;
03613     /* Apply unsigned->signed conversion. */
03614     dataptr[0] = (DCTELEM)
03615       ((z1 + tmp1 + tmp3 - 7 * CENTERJSAMPLE) << PASS1_BITS);
03616     tmp3 += tmp3;
03617     z1 -= tmp3;
03618     z1 -= tmp3;
03619     z1 = MULTIPLY(z1, FIX(0.353553391));                /* (c2+c6-c4)/2 */
03620     z2 = MULTIPLY(tmp0 - tmp2, FIX(0.920609002));       /* (c2+c4-c6)/2 */
03621     z3 = MULTIPLY(tmp1 - tmp2, FIX(0.314692123));       /* c6 */
03622     dataptr[2] = (DCTELEM) DESCALE(z1 + z2 + z3, CONST_BITS-PASS1_BITS);
03623     z1 -= z2;
03624     z2 = MULTIPLY(tmp0 - tmp1, FIX(0.881747734));       /* c4 */
03625     dataptr[4] = (DCTELEM)
03626       DESCALE(z2 + z3 - MULTIPLY(tmp1 - tmp3, FIX(0.707106781)), /* c2+c6-c4 */
03627           CONST_BITS-PASS1_BITS);
03628     dataptr[6] = (DCTELEM) DESCALE(z1 + z2, CONST_BITS-PASS1_BITS);
03629 
03630     /* Odd part */
03631 
03632     tmp1 = MULTIPLY(tmp10 + tmp11, FIX(0.935414347));   /* (c3+c1-c5)/2 */
03633     tmp2 = MULTIPLY(tmp10 - tmp11, FIX(0.170262339));   /* (c3+c5-c1)/2 */
03634     tmp0 = tmp1 - tmp2;
03635     tmp1 += tmp2;
03636     tmp2 = MULTIPLY(tmp11 + tmp12, - FIX(1.378756276)); /* -c1 */
03637     tmp1 += tmp2;
03638     tmp3 = MULTIPLY(tmp10 + tmp12, FIX(0.613604268));   /* c5 */
03639     tmp0 += tmp3;
03640     tmp2 += tmp3 + MULTIPLY(tmp12, FIX(1.870828693));   /* c3+c1-c5 */
03641 
03642     dataptr[1] = (DCTELEM) DESCALE(tmp0, CONST_BITS-PASS1_BITS);
03643     dataptr[3] = (DCTELEM) DESCALE(tmp1, CONST_BITS-PASS1_BITS);
03644     dataptr[5] = (DCTELEM) DESCALE(tmp2, CONST_BITS-PASS1_BITS);
03645 
03646     ctr++;
03647 
03648     if (ctr != DCTSIZE) {
03649       if (ctr == 14)
03650     break;          /* Done. */
03651       dataptr += DCTSIZE;   /* advance pointer to next row */
03652     } else
03653       dataptr = workspace;  /* switch pointer to extended workspace */
03654   }
03655 
03656   /* Pass 2: process columns.
03657    * We remove the PASS1_BITS scaling, but leave the results scaled up
03658    * by an overall factor of 8.
03659    * We must also scale the output by (8/7)*(8/14) = 32/49, which we
03660    * fold into the constant multipliers:
03661    * 14-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/28) * 32/49.
03662    */
03663 
03664   dataptr = data;
03665   wsptr = workspace;
03666   for (ctr = 0; ctr < 7; ctr++) {
03667     /* Even part */
03668 
03669     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*5];
03670     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*4];
03671     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*3];
03672     tmp13 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*2];
03673     tmp4 = dataptr[DCTSIZE*4] + wsptr[DCTSIZE*1];
03674     tmp5 = dataptr[DCTSIZE*5] + wsptr[DCTSIZE*0];
03675     tmp6 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7];
03676 
03677     tmp10 = tmp0 + tmp6;
03678     tmp14 = tmp0 - tmp6;
03679     tmp11 = tmp1 + tmp5;
03680     tmp15 = tmp1 - tmp5;
03681     tmp12 = tmp2 + tmp4;
03682     tmp16 = tmp2 - tmp4;
03683 
03684     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*5];
03685     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*4];
03686     tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*3];
03687     tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*2];
03688     tmp4 = dataptr[DCTSIZE*4] - wsptr[DCTSIZE*1];
03689     tmp5 = dataptr[DCTSIZE*5] - wsptr[DCTSIZE*0];
03690     tmp6 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
03691 
03692     dataptr[DCTSIZE*0] = (DCTELEM)
03693       DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12 + tmp13,
03694                FIX(0.653061224)),                 /* 32/49 */
03695           CONST_BITS+PASS1_BITS);
03696     tmp13 += tmp13;
03697     dataptr[DCTSIZE*4] = (DCTELEM)
03698       DESCALE(MULTIPLY(tmp10 - tmp13, FIX(0.832106052)) + /* c4 */
03699           MULTIPLY(tmp11 - tmp13, FIX(0.205513223)) - /* c12 */
03700           MULTIPLY(tmp12 - tmp13, FIX(0.575835255)),  /* c8 */
03701           CONST_BITS+PASS1_BITS);
03702 
03703     tmp10 = MULTIPLY(tmp14 + tmp15, FIX(0.722074570));    /* c6 */
03704 
03705     dataptr[DCTSIZE*2] = (DCTELEM)
03706       DESCALE(tmp10 + MULTIPLY(tmp14, FIX(0.178337691))   /* c2-c6 */
03707           + MULTIPLY(tmp16, FIX(0.400721155)),        /* c10 */
03708           CONST_BITS+PASS1_BITS);
03709     dataptr[DCTSIZE*6] = (DCTELEM)
03710       DESCALE(tmp10 - MULTIPLY(tmp15, FIX(1.122795725))   /* c6+c10 */
03711           - MULTIPLY(tmp16, FIX(0.900412262)),        /* c2 */
03712           CONST_BITS+PASS1_BITS);
03713 
03714     /* Odd part */
03715 
03716     tmp10 = tmp1 + tmp2;
03717     tmp11 = tmp5 - tmp4;
03718     dataptr[DCTSIZE*7] = (DCTELEM)
03719       DESCALE(MULTIPLY(tmp0 - tmp10 + tmp3 - tmp11 - tmp6,
03720                FIX(0.653061224)),                 /* 32/49 */
03721           CONST_BITS+PASS1_BITS);
03722     tmp3  = MULTIPLY(tmp3 , FIX(0.653061224));            /* 32/49 */
03723     tmp10 = MULTIPLY(tmp10, - FIX(0.103406812));          /* -c13 */
03724     tmp11 = MULTIPLY(tmp11, FIX(0.917760839));            /* c1 */
03725     tmp10 += tmp11 - tmp3;
03726     tmp11 = MULTIPLY(tmp0 + tmp2, FIX(0.782007410)) +     /* c5 */
03727         MULTIPLY(tmp4 + tmp6, FIX(0.491367823));      /* c9 */
03728     dataptr[DCTSIZE*5] = (DCTELEM)
03729       DESCALE(tmp10 + tmp11 - MULTIPLY(tmp2, FIX(1.550341076)) /* c3+c5-c13 */
03730           + MULTIPLY(tmp4, FIX(0.731428202)),         /* c1+c11-c9 */
03731           CONST_BITS+PASS1_BITS);
03732     tmp12 = MULTIPLY(tmp0 + tmp1, FIX(0.871740478)) +     /* c3 */
03733         MULTIPLY(tmp5 - tmp6, FIX(0.305035186));      /* c11 */
03734     dataptr[DCTSIZE*3] = (DCTELEM)
03735       DESCALE(tmp10 + tmp12 - MULTIPLY(tmp1, FIX(0.276965844)) /* c3-c9-c13 */
03736           - MULTIPLY(tmp5, FIX(2.004803435)),         /* c1+c5+c11 */
03737           CONST_BITS+PASS1_BITS);
03738     dataptr[DCTSIZE*1] = (DCTELEM)
03739       DESCALE(tmp11 + tmp12 + tmp3
03740           - MULTIPLY(tmp0, FIX(0.735987049))          /* c3+c5-c1 */
03741           - MULTIPLY(tmp6, FIX(0.082925825)),         /* c9-c11-c13 */
03742           CONST_BITS+PASS1_BITS);
03743 
03744     dataptr++;          /* advance pointer to next column */
03745     wsptr++;            /* advance pointer to next column */
03746   }
03747 }
03748 
03749 
03750 /*
03751  * Perform the forward DCT on a 6x12 sample block.
03752  *
03753  * 6-point FDCT in pass 1 (rows), 12-point in pass 2 (columns).
03754  */
03755 
03756 GLOBAL(void)
03757 jpeg_fdct_6x12 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03758 {
03759   INT32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5;
03760   INT32 tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
03761   DCTELEM workspace[8*4];
03762   DCTELEM *dataptr;
03763   DCTELEM *wsptr;
03764   JSAMPROW elemptr;
03765   int ctr;
03766   SHIFT_TEMPS
03767 
03768   /* Pre-zero output coefficient block. */
03769   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
03770 
03771   /* Pass 1: process rows.
03772    * Note results are scaled up by sqrt(8) compared to a true DCT;
03773    * furthermore, we scale the results by 2**PASS1_BITS.
03774    * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12).
03775    */
03776 
03777   dataptr = data;
03778   ctr = 0;
03779   for (;;) {
03780     elemptr = sample_data[ctr] + start_col;
03781 
03782     /* Even part */
03783 
03784     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[5]);
03785     tmp11 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[4]);
03786     tmp2 = GETJSAMPLE(elemptr[2]) + GETJSAMPLE(elemptr[3]);
03787 
03788     tmp10 = tmp0 + tmp2;
03789     tmp12 = tmp0 - tmp2;
03790 
03791     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[5]);
03792     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[4]);
03793     tmp2 = GETJSAMPLE(elemptr[2]) - GETJSAMPLE(elemptr[3]);
03794 
03795     /* Apply unsigned->signed conversion. */
03796     dataptr[0] = (DCTELEM)
03797       ((tmp10 + tmp11 - 6 * CENTERJSAMPLE) << PASS1_BITS);
03798     dataptr[2] = (DCTELEM)
03799       DESCALE(MULTIPLY(tmp12, FIX(1.224744871)),                 /* c2 */
03800           CONST_BITS-PASS1_BITS);
03801     dataptr[4] = (DCTELEM)
03802       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(0.707106781)), /* c4 */
03803           CONST_BITS-PASS1_BITS);
03804 
03805     /* Odd part */
03806 
03807     tmp10 = DESCALE(MULTIPLY(tmp0 + tmp2, FIX(0.366025404)),     /* c5 */
03808             CONST_BITS-PASS1_BITS);
03809 
03810     dataptr[1] = (DCTELEM) (tmp10 + ((tmp0 + tmp1) << PASS1_BITS));
03811     dataptr[3] = (DCTELEM) ((tmp0 - tmp1 - tmp2) << PASS1_BITS);
03812     dataptr[5] = (DCTELEM) (tmp10 + ((tmp2 - tmp1) << PASS1_BITS));
03813 
03814     ctr++;
03815 
03816     if (ctr != DCTSIZE) {
03817       if (ctr == 12)
03818     break;          /* Done. */
03819       dataptr += DCTSIZE;   /* advance pointer to next row */
03820     } else
03821       dataptr = workspace;  /* switch pointer to extended workspace */
03822   }
03823 
03824   /* Pass 2: process columns.
03825    * We remove the PASS1_BITS scaling, but leave the results scaled up
03826    * by an overall factor of 8.
03827    * We must also scale the output by (8/6)*(8/12) = 8/9, which we
03828    * fold into the constant multipliers:
03829    * 12-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/24) * 8/9.
03830    */
03831 
03832   dataptr = data;
03833   wsptr = workspace;
03834   for (ctr = 0; ctr < 6; ctr++) {
03835     /* Even part */
03836 
03837     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*3];
03838     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*2];
03839     tmp2 = dataptr[DCTSIZE*2] + wsptr[DCTSIZE*1];
03840     tmp3 = dataptr[DCTSIZE*3] + wsptr[DCTSIZE*0];
03841     tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*7];
03842     tmp5 = dataptr[DCTSIZE*5] + dataptr[DCTSIZE*6];
03843 
03844     tmp10 = tmp0 + tmp5;
03845     tmp13 = tmp0 - tmp5;
03846     tmp11 = tmp1 + tmp4;
03847     tmp14 = tmp1 - tmp4;
03848     tmp12 = tmp2 + tmp3;
03849     tmp15 = tmp2 - tmp3;
03850 
03851     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*3];
03852     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*2];
03853     tmp2 = dataptr[DCTSIZE*2] - wsptr[DCTSIZE*1];
03854     tmp3 = dataptr[DCTSIZE*3] - wsptr[DCTSIZE*0];
03855     tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*7];
03856     tmp5 = dataptr[DCTSIZE*5] - dataptr[DCTSIZE*6];
03857 
03858     dataptr[DCTSIZE*0] = (DCTELEM)
03859       DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(0.888888889)), /* 8/9 */
03860           CONST_BITS+PASS1_BITS);
03861     dataptr[DCTSIZE*6] = (DCTELEM)
03862       DESCALE(MULTIPLY(tmp13 - tmp14 - tmp15, FIX(0.888888889)), /* 8/9 */
03863           CONST_BITS+PASS1_BITS);
03864     dataptr[DCTSIZE*4] = (DCTELEM)
03865       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.088662108)),         /* c4 */
03866           CONST_BITS+PASS1_BITS);
03867     dataptr[DCTSIZE*2] = (DCTELEM)
03868       DESCALE(MULTIPLY(tmp14 - tmp15, FIX(0.888888889)) +        /* 8/9 */
03869           MULTIPLY(tmp13 + tmp15, FIX(1.214244803)),         /* c2 */
03870           CONST_BITS+PASS1_BITS);
03871 
03872     /* Odd part */
03873 
03874     tmp10 = MULTIPLY(tmp1 + tmp4, FIX(0.481063200));   /* c9 */
03875     tmp14 = tmp10 + MULTIPLY(tmp1, FIX(0.680326102));  /* c3-c9 */
03876     tmp15 = tmp10 - MULTIPLY(tmp4, FIX(1.642452502));  /* c3+c9 */
03877     tmp12 = MULTIPLY(tmp0 + tmp2, FIX(0.997307603));   /* c5 */
03878     tmp13 = MULTIPLY(tmp0 + tmp3, FIX(0.765261039));   /* c7 */
03879     tmp10 = tmp12 + tmp13 + tmp14 - MULTIPLY(tmp0, FIX(0.516244403)) /* c5+c7-c1 */
03880         + MULTIPLY(tmp5, FIX(0.164081699));        /* c11 */
03881     tmp11 = MULTIPLY(tmp2 + tmp3, - FIX(0.164081699)); /* -c11 */
03882     tmp12 += tmp11 - tmp15 - MULTIPLY(tmp2, FIX(2.079550144)) /* c1+c5-c11 */
03883         + MULTIPLY(tmp5, FIX(0.765261039));        /* c7 */
03884     tmp13 += tmp11 - tmp14 + MULTIPLY(tmp3, FIX(0.645144899)) /* c1+c11-c7 */
03885         - MULTIPLY(tmp5, FIX(0.997307603));        /* c5 */
03886     tmp11 = tmp15 + MULTIPLY(tmp0 - tmp3, FIX(1.161389302)) /* c3 */
03887         - MULTIPLY(tmp2 + tmp5, FIX(0.481063200)); /* c9 */
03888 
03889     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10, CONST_BITS+PASS1_BITS);
03890     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp11, CONST_BITS+PASS1_BITS);
03891     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12, CONST_BITS+PASS1_BITS);
03892     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp13, CONST_BITS+PASS1_BITS);
03893 
03894     dataptr++;          /* advance pointer to next column */
03895     wsptr++;            /* advance pointer to next column */
03896   }
03897 }
03898 
03899 
03900 /*
03901  * Perform the forward DCT on a 5x10 sample block.
03902  *
03903  * 5-point FDCT in pass 1 (rows), 10-point in pass 2 (columns).
03904  */
03905 
03906 GLOBAL(void)
03907 jpeg_fdct_5x10 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
03908 {
03909   INT32 tmp0, tmp1, tmp2, tmp3, tmp4;
03910   INT32 tmp10, tmp11, tmp12, tmp13, tmp14;
03911   DCTELEM workspace[8*2];
03912   DCTELEM *dataptr;
03913   DCTELEM *wsptr;
03914   JSAMPROW elemptr;
03915   int ctr;
03916   SHIFT_TEMPS
03917 
03918   /* Pre-zero output coefficient block. */
03919   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
03920 
03921   /* Pass 1: process rows.
03922    * Note results are scaled up by sqrt(8) compared to a true DCT;
03923    * furthermore, we scale the results by 2**PASS1_BITS.
03924    * 5-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/10).
03925    */
03926 
03927   dataptr = data;
03928   ctr = 0;
03929   for (;;) {
03930     elemptr = sample_data[ctr] + start_col;
03931 
03932     /* Even part */
03933 
03934     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[4]);
03935     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[3]);
03936     tmp2 = GETJSAMPLE(elemptr[2]);
03937 
03938     tmp10 = tmp0 + tmp1;
03939     tmp11 = tmp0 - tmp1;
03940 
03941     tmp0 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[4]);
03942     tmp1 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[3]);
03943 
03944     /* Apply unsigned->signed conversion. */
03945     dataptr[0] = (DCTELEM)
03946       ((tmp10 + tmp2 - 5 * CENTERJSAMPLE) << PASS1_BITS);
03947     tmp11 = MULTIPLY(tmp11, FIX(0.790569415));          /* (c2+c4)/2 */
03948     tmp10 -= tmp2 << 2;
03949     tmp10 = MULTIPLY(tmp10, FIX(0.353553391));          /* (c2-c4)/2 */
03950     dataptr[2] = (DCTELEM) DESCALE(tmp11 + tmp10, CONST_BITS-PASS1_BITS);
03951     dataptr[4] = (DCTELEM) DESCALE(tmp11 - tmp10, CONST_BITS-PASS1_BITS);
03952 
03953     /* Odd part */
03954 
03955     tmp10 = MULTIPLY(tmp0 + tmp1, FIX(0.831253876));    /* c3 */
03956 
03957     dataptr[1] = (DCTELEM)
03958       DESCALE(tmp10 + MULTIPLY(tmp0, FIX(0.513743148)), /* c1-c3 */
03959           CONST_BITS-PASS1_BITS);
03960     dataptr[3] = (DCTELEM)
03961       DESCALE(tmp10 - MULTIPLY(tmp1, FIX(2.176250899)), /* c1+c3 */
03962           CONST_BITS-PASS1_BITS);
03963 
03964     ctr++;
03965 
03966     if (ctr != DCTSIZE) {
03967       if (ctr == 10)
03968     break;          /* Done. */
03969       dataptr += DCTSIZE;   /* advance pointer to next row */
03970     } else
03971       dataptr = workspace;  /* switch pointer to extended workspace */
03972   }
03973 
03974   /* Pass 2: process columns.
03975    * We remove the PASS1_BITS scaling, but leave the results scaled up
03976    * by an overall factor of 8.
03977    * We must also scale the output by (8/5)*(8/10) = 32/25, which we
03978    * fold into the constant multipliers:
03979    * 10-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/20) * 32/25.
03980    */
03981 
03982   dataptr = data;
03983   wsptr = workspace;
03984   for (ctr = 0; ctr < 5; ctr++) {
03985     /* Even part */
03986 
03987     tmp0 = dataptr[DCTSIZE*0] + wsptr[DCTSIZE*1];
03988     tmp1 = dataptr[DCTSIZE*1] + wsptr[DCTSIZE*0];
03989     tmp12 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*7];
03990     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*6];
03991     tmp4 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5];
03992 
03993     tmp10 = tmp0 + tmp4;
03994     tmp13 = tmp0 - tmp4;
03995     tmp11 = tmp1 + tmp3;
03996     tmp14 = tmp1 - tmp3;
03997 
03998     tmp0 = dataptr[DCTSIZE*0] - wsptr[DCTSIZE*1];
03999     tmp1 = dataptr[DCTSIZE*1] - wsptr[DCTSIZE*0];
04000     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*7];
04001     tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*6];
04002     tmp4 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
04003 
04004     dataptr[DCTSIZE*0] = (DCTELEM)
04005       DESCALE(MULTIPLY(tmp10 + tmp11 + tmp12, FIX(1.28)), /* 32/25 */
04006           CONST_BITS+PASS1_BITS);
04007     tmp12 += tmp12;
04008     dataptr[DCTSIZE*4] = (DCTELEM)
04009       DESCALE(MULTIPLY(tmp10 - tmp12, FIX(1.464477191)) - /* c4 */
04010           MULTIPLY(tmp11 - tmp12, FIX(0.559380511)),  /* c8 */
04011           CONST_BITS+PASS1_BITS);
04012     tmp10 = MULTIPLY(tmp13 + tmp14, FIX(1.064004961));    /* c6 */
04013     dataptr[DCTSIZE*2] = (DCTELEM)
04014       DESCALE(tmp10 + MULTIPLY(tmp13, FIX(0.657591230)),  /* c2-c6 */
04015           CONST_BITS+PASS1_BITS);
04016     dataptr[DCTSIZE*6] = (DCTELEM)
04017       DESCALE(tmp10 - MULTIPLY(tmp14, FIX(2.785601151)),  /* c2+c6 */
04018           CONST_BITS+PASS1_BITS);
04019 
04020     /* Odd part */
04021 
04022     tmp10 = tmp0 + tmp4;
04023     tmp11 = tmp1 - tmp3;
04024     dataptr[DCTSIZE*5] = (DCTELEM)
04025       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp2, FIX(1.28)),  /* 32/25 */
04026           CONST_BITS+PASS1_BITS);
04027     tmp2 = MULTIPLY(tmp2, FIX(1.28));                     /* 32/25 */
04028     dataptr[DCTSIZE*1] = (DCTELEM)
04029       DESCALE(MULTIPLY(tmp0, FIX(1.787906876)) +          /* c1 */
04030           MULTIPLY(tmp1, FIX(1.612894094)) + tmp2 +   /* c3 */
04031           MULTIPLY(tmp3, FIX(0.821810588)) +          /* c7 */
04032           MULTIPLY(tmp4, FIX(0.283176630)),           /* c9 */
04033           CONST_BITS+PASS1_BITS);
04034     tmp12 = MULTIPLY(tmp0 - tmp4, FIX(1.217352341)) -     /* (c3+c7)/2 */
04035         MULTIPLY(tmp1 + tmp3, FIX(0.752365123));      /* (c1-c9)/2 */
04036     tmp13 = MULTIPLY(tmp10 + tmp11, FIX(0.395541753)) +   /* (c3-c7)/2 */
04037         MULTIPLY(tmp11, FIX(0.64)) - tmp2;            /* 16/25 */
04038     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp12 + tmp13, CONST_BITS+PASS1_BITS);
04039     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp12 - tmp13, CONST_BITS+PASS1_BITS);
04040 
04041     dataptr++;          /* advance pointer to next column */
04042     wsptr++;            /* advance pointer to next column */
04043   }
04044 }
04045 
04046 
04047 /*
04048  * Perform the forward DCT on a 4x8 sample block.
04049  *
04050  * 4-point FDCT in pass 1 (rows), 8-point in pass 2 (columns).
04051  */
04052 
04053 GLOBAL(void)
04054 jpeg_fdct_4x8 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
04055 {
04056   INT32 tmp0, tmp1, tmp2, tmp3;
04057   INT32 tmp10, tmp11, tmp12, tmp13;
04058   INT32 z1;
04059   DCTELEM *dataptr;
04060   JSAMPROW elemptr;
04061   int ctr;
04062   SHIFT_TEMPS
04063 
04064   /* Pre-zero output coefficient block. */
04065   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
04066 
04067   /* Pass 1: process rows.
04068    * Note results are scaled up by sqrt(8) compared to a true DCT;
04069    * furthermore, we scale the results by 2**PASS1_BITS.
04070    * We must also scale the output by 8/4 = 2, which we add here.
04071    * 4-point FDCT kernel,
04072    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
04073    */
04074 
04075   dataptr = data;
04076   for (ctr = 0; ctr < DCTSIZE; ctr++) {
04077     elemptr = sample_data[ctr] + start_col;
04078 
04079     /* Even part */
04080 
04081     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[3]);
04082     tmp1 = GETJSAMPLE(elemptr[1]) + GETJSAMPLE(elemptr[2]);
04083 
04084     tmp10 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[3]);
04085     tmp11 = GETJSAMPLE(elemptr[1]) - GETJSAMPLE(elemptr[2]);
04086 
04087     /* Apply unsigned->signed conversion. */
04088     dataptr[0] = (DCTELEM)
04089       ((tmp0 + tmp1 - 4 * CENTERJSAMPLE) << (PASS1_BITS+1));
04090     dataptr[2] = (DCTELEM) ((tmp0 - tmp1) << (PASS1_BITS+1));
04091 
04092     /* Odd part */
04093 
04094     tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
04095     /* Add fudge factor here for final descale. */
04096     tmp0 += ONE << (CONST_BITS-PASS1_BITS-2);
04097 
04098     dataptr[1] = (DCTELEM)
04099       RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
04100           CONST_BITS-PASS1_BITS-1);
04101     dataptr[3] = (DCTELEM)
04102       RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
04103           CONST_BITS-PASS1_BITS-1);
04104 
04105     dataptr += DCTSIZE;     /* advance pointer to next row */
04106   }
04107 
04108   /* Pass 2: process columns.
04109    * We remove the PASS1_BITS scaling, but leave the results scaled up
04110    * by an overall factor of 8.
04111    * 8-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/16).
04112    */
04113 
04114   dataptr = data;
04115   for (ctr = 0; ctr < 4; ctr++) {
04116     /* Even part per LL&M figure 1 --- note that published figure is faulty;
04117      * rotator "c1" should be "c6".
04118      */
04119 
04120     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*7];
04121     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*6];
04122     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*5];
04123     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
04124 
04125     /* Add fudge factor here for final descale. */
04126     tmp10 = tmp0 + tmp3 + (ONE << (PASS1_BITS-1));
04127     tmp12 = tmp0 - tmp3;
04128     tmp11 = tmp1 + tmp2;
04129     tmp13 = tmp1 - tmp2;
04130 
04131     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*7];
04132     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*6];
04133     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
04134     tmp3 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
04135 
04136     dataptr[DCTSIZE*0] = (DCTELEM) RIGHT_SHIFT(tmp10 + tmp11, PASS1_BITS);
04137     dataptr[DCTSIZE*4] = (DCTELEM) RIGHT_SHIFT(tmp10 - tmp11, PASS1_BITS);
04138 
04139     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);       /* c6 */
04140     /* Add fudge factor here for final descale. */
04141     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
04142 
04143     dataptr[DCTSIZE*2] = (DCTELEM)
04144       RIGHT_SHIFT(z1 + MULTIPLY(tmp12, FIX_0_765366865), /* c2-c6 */
04145           CONST_BITS+PASS1_BITS);
04146     dataptr[DCTSIZE*6] = (DCTELEM)
04147       RIGHT_SHIFT(z1 - MULTIPLY(tmp13, FIX_1_847759065), /* c2+c6 */
04148           CONST_BITS+PASS1_BITS);
04149 
04150     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
04151      * i0..i3 in the paper are tmp0..tmp3 here.
04152      */
04153 
04154     tmp12 = tmp0 + tmp2;
04155     tmp13 = tmp1 + tmp3;
04156 
04157     z1 = MULTIPLY(tmp12 + tmp13, FIX_1_175875602);       /*  c3 */
04158     /* Add fudge factor here for final descale. */
04159     z1 += ONE << (CONST_BITS+PASS1_BITS-1);
04160 
04161     tmp12 = MULTIPLY(tmp12, - FIX_0_390180644);          /* -c3+c5 */
04162     tmp13 = MULTIPLY(tmp13, - FIX_1_961570560);          /* -c3-c5 */
04163     tmp12 += z1;
04164     tmp13 += z1;
04165 
04166     z1 = MULTIPLY(tmp0 + tmp3, - FIX_0_899976223);       /* -c3+c7 */
04167     tmp0 = MULTIPLY(tmp0, FIX_1_501321110);              /*  c1+c3-c5-c7 */
04168     tmp3 = MULTIPLY(tmp3, FIX_0_298631336);              /* -c1+c3+c5-c7 */
04169     tmp0 += z1 + tmp12;
04170     tmp3 += z1 + tmp13;
04171 
04172     z1 = MULTIPLY(tmp1 + tmp2, - FIX_2_562915447);       /* -c1-c3 */
04173     tmp1 = MULTIPLY(tmp1, FIX_3_072711026);              /*  c1+c3+c5-c7 */
04174     tmp2 = MULTIPLY(tmp2, FIX_2_053119869);              /*  c1+c3-c5+c7 */
04175     tmp1 += z1 + tmp13;
04176     tmp2 += z1 + tmp12;
04177 
04178     dataptr[DCTSIZE*1] = (DCTELEM) RIGHT_SHIFT(tmp0, CONST_BITS+PASS1_BITS);
04179     dataptr[DCTSIZE*3] = (DCTELEM) RIGHT_SHIFT(tmp1, CONST_BITS+PASS1_BITS);
04180     dataptr[DCTSIZE*5] = (DCTELEM) RIGHT_SHIFT(tmp2, CONST_BITS+PASS1_BITS);
04181     dataptr[DCTSIZE*7] = (DCTELEM) RIGHT_SHIFT(tmp3, CONST_BITS+PASS1_BITS);
04182 
04183     dataptr++;          /* advance pointer to next column */
04184   }
04185 }
04186 
04187 
04188 /*
04189  * Perform the forward DCT on a 3x6 sample block.
04190  *
04191  * 3-point FDCT in pass 1 (rows), 6-point in pass 2 (columns).
04192  */
04193 
04194 GLOBAL(void)
04195 jpeg_fdct_3x6 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
04196 {
04197   INT32 tmp0, tmp1, tmp2;
04198   INT32 tmp10, tmp11, tmp12;
04199   DCTELEM *dataptr;
04200   JSAMPROW elemptr;
04201   int ctr;
04202   SHIFT_TEMPS
04203 
04204   /* Pre-zero output coefficient block. */
04205   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
04206 
04207   /* Pass 1: process rows.
04208    * Note results are scaled up by sqrt(8) compared to a true DCT;
04209    * furthermore, we scale the results by 2**PASS1_BITS.
04210    * We scale the results further by 2 as part of output adaption
04211    * scaling for different DCT size.
04212    * 3-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/6).
04213    */
04214 
04215   dataptr = data;
04216   for (ctr = 0; ctr < 6; ctr++) {
04217     elemptr = sample_data[ctr] + start_col;
04218 
04219     /* Even part */
04220 
04221     tmp0 = GETJSAMPLE(elemptr[0]) + GETJSAMPLE(elemptr[2]);
04222     tmp1 = GETJSAMPLE(elemptr[1]);
04223 
04224     tmp2 = GETJSAMPLE(elemptr[0]) - GETJSAMPLE(elemptr[2]);
04225 
04226     /* Apply unsigned->signed conversion. */
04227     dataptr[0] = (DCTELEM)
04228       ((tmp0 + tmp1 - 3 * CENTERJSAMPLE) << (PASS1_BITS+1));
04229     dataptr[2] = (DCTELEM)
04230       DESCALE(MULTIPLY(tmp0 - tmp1 - tmp1, FIX(0.707106781)), /* c2 */
04231           CONST_BITS-PASS1_BITS-1);
04232 
04233     /* Odd part */
04234 
04235     dataptr[1] = (DCTELEM)
04236       DESCALE(MULTIPLY(tmp2, FIX(1.224744871)),               /* c1 */
04237           CONST_BITS-PASS1_BITS-1);
04238 
04239     dataptr += DCTSIZE;     /* advance pointer to next row */
04240   }
04241 
04242   /* Pass 2: process columns.
04243    * We remove the PASS1_BITS scaling, but leave the results scaled up
04244    * by an overall factor of 8.
04245    * We must also scale the output by (8/6)*(8/3) = 32/9, which we partially
04246    * fold into the constant multipliers (other part was done in pass 1):
04247    * 6-point FDCT kernel, cK represents sqrt(2) * cos(K*pi/12) * 16/9.
04248    */
04249 
04250   dataptr = data;
04251   for (ctr = 0; ctr < 3; ctr++) {
04252     /* Even part */
04253 
04254     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*5];
04255     tmp11 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*4];
04256     tmp2 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3];
04257 
04258     tmp10 = tmp0 + tmp2;
04259     tmp12 = tmp0 - tmp2;
04260 
04261     tmp0 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*5];
04262     tmp1 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*4];
04263     tmp2 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
04264 
04265     dataptr[DCTSIZE*0] = (DCTELEM)
04266       DESCALE(MULTIPLY(tmp10 + tmp11, FIX(1.777777778)),         /* 16/9 */
04267           CONST_BITS+PASS1_BITS);
04268     dataptr[DCTSIZE*2] = (DCTELEM)
04269       DESCALE(MULTIPLY(tmp12, FIX(2.177324216)),                 /* c2 */
04270           CONST_BITS+PASS1_BITS);
04271     dataptr[DCTSIZE*4] = (DCTELEM)
04272       DESCALE(MULTIPLY(tmp10 - tmp11 - tmp11, FIX(1.257078722)), /* c4 */
04273           CONST_BITS+PASS1_BITS);
04274 
04275     /* Odd part */
04276 
04277     tmp10 = MULTIPLY(tmp0 + tmp2, FIX(0.650711829));             /* c5 */
04278 
04279     dataptr[DCTSIZE*1] = (DCTELEM)
04280       DESCALE(tmp10 + MULTIPLY(tmp0 + tmp1, FIX(1.777777778)),   /* 16/9 */
04281           CONST_BITS+PASS1_BITS);
04282     dataptr[DCTSIZE*3] = (DCTELEM)
04283       DESCALE(MULTIPLY(tmp0 - tmp1 - tmp2, FIX(1.777777778)),    /* 16/9 */
04284           CONST_BITS+PASS1_BITS);
04285     dataptr[DCTSIZE*5] = (DCTELEM)
04286       DESCALE(tmp10 + MULTIPLY(tmp2 - tmp1, FIX(1.777777778)),   /* 16/9 */
04287           CONST_BITS+PASS1_BITS);
04288 
04289     dataptr++;          /* advance pointer to next column */
04290   }
04291 }
04292 
04293 
04294 /*
04295  * Perform the forward DCT on a 2x4 sample block.
04296  *
04297  * 2-point FDCT in pass 1 (rows), 4-point in pass 2 (columns).
04298  */
04299 
04300 GLOBAL(void)
04301 jpeg_fdct_2x4 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
04302 {
04303   INT32 tmp0, tmp1;
04304   INT32 tmp10, tmp11;
04305   DCTELEM *dataptr;
04306   JSAMPROW elemptr;
04307   int ctr;
04308   SHIFT_TEMPS
04309 
04310   /* Pre-zero output coefficient block. */
04311   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
04312 
04313   /* Pass 1: process rows.
04314    * Note results are scaled up by sqrt(8) compared to a true DCT.
04315    * We must also scale the output by (8/2)*(8/4) = 2**3, which we add here.
04316    */
04317 
04318   dataptr = data;
04319   for (ctr = 0; ctr < 4; ctr++) {
04320     elemptr = sample_data[ctr] + start_col;
04321 
04322     /* Even part */
04323 
04324     tmp0 = GETJSAMPLE(elemptr[0]);
04325     tmp1 = GETJSAMPLE(elemptr[1]);
04326 
04327     /* Apply unsigned->signed conversion. */
04328     dataptr[0] = (DCTELEM) ((tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 3);
04329 
04330     /* Odd part */
04331 
04332     dataptr[1] = (DCTELEM) ((tmp0 - tmp1) << 3);
04333 
04334     dataptr += DCTSIZE;     /* advance pointer to next row */
04335   }
04336 
04337   /* Pass 2: process columns.
04338    * We leave the results scaled up by an overall factor of 8.
04339    * 4-point FDCT kernel,
04340    * cK represents sqrt(2) * cos(K*pi/16) [refers to 8-point FDCT].
04341    */
04342 
04343   dataptr = data;
04344   for (ctr = 0; ctr < 2; ctr++) {
04345     /* Even part */
04346 
04347     tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*3];
04348     tmp1 = dataptr[DCTSIZE*1] + dataptr[DCTSIZE*2];
04349 
04350     tmp10 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*3];
04351     tmp11 = dataptr[DCTSIZE*1] - dataptr[DCTSIZE*2];
04352 
04353     dataptr[DCTSIZE*0] = (DCTELEM) (tmp0 + tmp1);
04354     dataptr[DCTSIZE*2] = (DCTELEM) (tmp0 - tmp1);
04355 
04356     /* Odd part */
04357 
04358     tmp0 = MULTIPLY(tmp10 + tmp11, FIX_0_541196100);       /* c6 */
04359     /* Add fudge factor here for final descale. */
04360     tmp0 += ONE << (CONST_BITS-1);
04361 
04362     dataptr[DCTSIZE*1] = (DCTELEM)
04363       RIGHT_SHIFT(tmp0 + MULTIPLY(tmp10, FIX_0_765366865), /* c2-c6 */
04364           CONST_BITS);
04365     dataptr[DCTSIZE*3] = (DCTELEM)
04366       RIGHT_SHIFT(tmp0 - MULTIPLY(tmp11, FIX_1_847759065), /* c2+c6 */
04367           CONST_BITS);
04368 
04369     dataptr++;          /* advance pointer to next column */
04370   }
04371 }
04372 
04373 
04374 /*
04375  * Perform the forward DCT on a 1x2 sample block.
04376  *
04377  * 1-point FDCT in pass 1 (rows), 2-point in pass 2 (columns).
04378  */
04379 
04380 GLOBAL(void)
04381 jpeg_fdct_1x2 (DCTELEM * data, JSAMPARRAY sample_data, JDIMENSION start_col)
04382 {
04383   DCTELEM tmp0, tmp1;
04384 
04385   /* Pre-zero output coefficient block. */
04386   MEMZERO(data, SIZEOF(DCTELEM) * DCTSIZE2);
04387 
04388   /* Pass 1: empty. */
04389 
04390   /* Pass 2: process columns.
04391    * We leave the results scaled up by an overall factor of 8.
04392    * We must also scale the output by (8/1)*(8/2) = 2**5.
04393    */
04394 
04395   /* Even part */
04396 
04397   tmp0 = GETJSAMPLE(sample_data[0][start_col]);
04398   tmp1 = GETJSAMPLE(sample_data[1][start_col]);
04399 
04400   /* Apply unsigned->signed conversion. */
04401   data[DCTSIZE*0] = (tmp0 + tmp1 - 2 * CENTERJSAMPLE) << 5;
04402 
04403   /* Odd part */
04404 
04405   data[DCTSIZE*1] = (tmp0 - tmp1) << 5;
04406 }
04407 
04408 #endif /* DCT_SCALING_SUPPORTED */
04409 #endif /* DCT_ISLOW_SUPPORTED */