/* based on epic_decode but used here for decoding FPP JPEG DCT images
   this should be integrated with fpp_decode but for now has many
   duplications that are hidden via statics, just the epic_decode function
   is used by fpp_decode
   parts used in original epic_decode (which was a standalone) but not needed
   here were deleted
 */
#include  <stdio.h>
#include  <sys/stat.h>
#define huff_EXTEND(x,s)  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
 static const int extend_test[16] =   /* entry n is 2**(n-1) */
  { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
    0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };

 static const int extend_offset[16] = /* entry n is (-1 << n) + 1 */
  { 0, ((-1)<<1) + 1, ((-1)<<2) + 1, ((-1)<<3) + 1, ((-1)<<4) + 1,
    ((-1)<<5) + 1, ((-1)<<6) + 1, ((-1)<<7) + 1, ((-1)<<8) + 1,
    ((-1)<<9) + 1, ((-1)<<10) + 1, ((-1)<<11) + 1, ((-1)<<12) + 1,
    ((-1)<<13) + 1, ((-1)<<14) + 1, ((-1)<<15) + 1 };

 FILE	*fopen(), *fin, *fout;
 /* global tables for Huffman decoding*/
 typedef unsigned char byte;
 static byte	dc_look_nbits[256],dc_look_sym[256],ac_look_nbits[256],ac_look_sym[256];
 static byte	dc_bits[16], dc_huffval[256], ac_bits[16], ac_huffval[256];
 static int	dc_mincode[16], dc_maxcode[16], dc_valptr[16];
 static int	ac_mincode[16], ac_maxcode[16], ac_valptr[16];
 static short	qt[64];

 /* image size and # of blocks */
 static int	nblocks, nx, ny, qfactor, restart_interval=0;
 static int	n_restarts_found = 0, n_unexpected = 0;

 /* zag[i] is the natural-order position of the i'th element of zigzag order. */
 
 static const int zag[64] = {
   0,  1,  8, 16,  9,  2,  3, 10,
  17, 24, 32, 25, 18, 11,  4,  5,
  12, 19, 26, 33, 40, 48, 41, 34,
  27, 20, 13,  6,  7, 14, 21, 28,
  35, 42, 49, 56, 57, 50, 43, 36,
  29, 22, 15, 23, 30, 37, 44, 51,
  58, 59, 52, 45, 38, 31, 39, 46,
  53, 60, 61, 54, 47, 55, 62, 63
 };

 static float aansf[8] = { 1.0, 1.387039845, 1.306562965, 1.175875602,
	   1.0, 0.785694958, 0.541196100, 0.275899379};
 static float ws[64], fqtbl[64], bias = 2048.5;
 static short	*dct;

 /* ----------------------------------------------------*/
static int huff_setups()
 {
 /* assumes that dc_bits, ac_bits, dc_huffval, and ac_huffval are already
 loaded from the header */
 unsigned char huffsize[256], *p, *bits, *huffval;
 unsigned short	huffcode[256], *pc;
 int	*mincode, *maxcode, *valptr;
 byte	*look_nbits, *look_sym;
 int	i, j, code, k, iq, n;
 int	jq, ntable, lookbits;

 /* use bits to generate the Huffman codes and sizes in code-length order */
 /* for both dc and ac tables */
 ntable = 2;
 bits = dc_bits;	valptr = dc_valptr;	mincode = dc_mincode;
 maxcode = dc_maxcode;	look_nbits = dc_look_nbits;
 look_sym = dc_look_sym;	huffval = dc_huffval;
 while (ntable--) {
 p = huffsize;
 pc = huffcode;
 code = 0;
 for (k = 0; k < 16; k++) {
   j = k + 1;	n = (int) bits[k];	/* n is the number of this length */
   while (n--)  {			/* the codes of a common length */
   	*p++ = (unsigned char) (j);	/* just increase by 1 wrt previous */
	*pc++ = code++;	  }
  code <<= 1;				/* for new code size, left shift */
  }
  *p = 0;

 /* Figure F.15: generate decoding tables for bit-sequential decoding */
 iq = 0;
 for (k = 0; k < 16; k++) {
   if (bits[k]) {
     valptr[k] = iq; 	/* huffval[] index of 1st symbol of code length k+1 */
     mincode[k] = huffcode[iq]; /* minimum code of length k+1 */
     iq += bits[k];
     maxcode[k] = huffcode[iq-1]; /* maximum code of length k+1 */
   } else {
     maxcode[k] = -1;	/* -1 if no codes of this length */
   }
 }

 /* generate lookup tables to speed up the process for Huffman codes of
 length 8 or less, one set for dc and ac, follows technique in jpeg-5a but
 not the same code so some variables may look similar but beware ! */
 
 bzero(look_nbits, 256);
 iq = 0;
 for (k = 0; k < 8; k++) {
  for (i = 0; i < (int) bits[k]; i++, iq++) {
  /* k+1 = current code's length, iq = its index in huffcode[] & huffval[]. */
  /* Generate left-justified code followed by all possible bit sequences */
    lookbits = huffcode[iq] << (7 - k);
    jq = k+1;
    for (j= 1 << (7 - k);j > 0;j--) {
      look_nbits[lookbits] = jq;
      look_sym[lookbits] = huffval[iq];
      lookbits++;
    }
  }
 } 
 bits = ac_bits;	valptr = ac_valptr;	mincode = ac_mincode;
 maxcode = ac_maxcode;	look_nbits = ac_look_nbits;
 look_sym = ac_look_sym;	huffval = ac_huffval;
 }
 return 1;
}
 /*------------------------------------------------------------------------- */
static int huff_decode_err(n)
 int	n;
 {
 printf("Huffman decoding error # %d\n", n);
 return -1; 
 }
 /*------------------------------------------------------------------------- */
static int dct_buffer_decode_err(limit)
 int limit;
 {
 printf("Exceeded data size during Huffman decompression , limit =%d\n", limit);
 return -1;
 }
 /*------------------------------------------------------------------------- */
static int huff_decode_dct(dct, nblocks, x, limit)
 /* returns decoded dct, the coded data is in x, huffman tables as in
 huff_encode_dct */
 /* assumes messages and pads already removed by a pre-scan */
 short	dct[];
 byte	x[];
 int	limit, nblocks;
 {
 int	i, j, k, idct, r1, last_dc=0;
 int	jq, look, nb, lbits, ks;
 unsigned int	temp, temp2, nbits, rs;
 unsigned int mask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff,
  0x1ff,0x3ff,0x7ff,0xfff,0x1fff,0x3fff,0x7fff,0xffff};
 byte *px;


 px = x;	/* where we get the Huffman coded version */
 r1 = 0;	/* a bit count */

 /* decode the dc and ac components of a block */
 /* limit is the max number of bytes in the stream */
 /* parts modified from the jpeg-5a code */

 while (nblocks--) {
 bzero(dct, 128);	/* pre-zero this block */
 /* start dc decode, grab 8 bits and use lookup shortcut to see if we
 got a short Huffman code */
 
 i=r1>>3;	j=r1%8;		/* our byte and bit addresses */
 if (i > limit) return dct_buffer_decode_err(limit);
 px = x + i;
 if (j == 0) {	/* aligned on byte, lucky 1/8 */
  look = *px;
 } else {	/* need part of next byte as well */
  jq = 8 - j;	/* available bits in px */
  look = *px++ << j;	/* msb, make room for lsb */
  look |= ((int) *px) >> jq;
  look = look & 0xff;
 }
 if ((nb = dc_look_nbits[look]) != 0) {
     nbits = dc_look_sym[look];
     lbits = 8 -nb;
     r1 += nb;
 } else {
     /* get here if the code is longer than 8 bits or some kind of disaster */
     r1 += 8;	/* point after look */
     i=r1>>3;	j=r1%8;		px = x + i;
     /* get 16 bits in temp */
     temp = ((int) look) << 8;	/* look will be top 8 */
     if (j == 0) { temp = temp | ( (int) *px );
     } else {
     /* need 2 transfers, 8 bits total */
     jq = 8 - j;	/* available bits in px */
     temp |= ( *px++ << j );
     temp |= ( ((int) *px) >> jq);
     }
     k = 7;	nb = 8;		/* here nb is code size - 1 */
     while ( (ks =(temp >> k)) > dc_maxcode[nb] ) { k--; nb++;
     	if (k < 0)  return huff_decode_err(0); }
     /* note error return if we couldn't find a code */
     nbits = dc_huffval[ dc_valptr[nb] + ks - dc_mincode[nb] ];
     nb++;	/* actual size of code */
     r1 += (nb -8);
     lbits = 0;
 }
 /* that defines the dc range interval, now get the rest of the bits */
 /* if nbits is 0, don't need any */
 if (nbits) {
 /* we have some still in look, lbits */
 /* if so, nb is valid, so use it to shift old look */
 if (lbits >= nbits) {  temp = (look>> (lbits-nbits)) & mask[nbits];
 } else {
 /* harder, need "nbits" # of bits */
 i=r1>>3;	j=r1%8;		px = x + i;
 jq = 8 - j;
 temp = ( *px  & mask[jq] );	/* gets us jq ls bits */
 ks = nbits - jq;		/* how much more needed (could be < 0) */
 if (ks>0) { temp = temp << ks;	/* shift over to make room */
  ks -=8;	temp2 = ((int) *++px);
  if (ks>0) { temp |= ( temp2 << ks );	/* need a third ? */
    ks -=8;	temp2 = ((int) *++px);
    }
 temp |= ( temp2 >> (-ks) );		/* last, #2 or #3 */
 } else { temp = temp >> (-ks); }
 }
 r1 += nbits;	/* count after this is done */
 /* now extend the sign, uses the jpeg-5a macro defined above */
 /* we use the lookup table version */
 nbits = huff_EXTEND(temp, nbits);
 }
 dct[0] = last_dc = nbits + last_dc;
 /* wraps the dc term, start the ac */

 for (idct = 1; idct < 64; idct++) {
 i=r1>>3;	j=r1%8;		px = x + i;
 if (i > limit) return dct_buffer_decode_err(limit);
 /* decode the Huffman symbol, use ac tables */
 if (j == 0) {	/* aligned on byte, lucky 1/8 */
  look = *px;
 } else {	/* need part of next byte as well */
  jq = 8 - j;	/* available bits in px */
  look = *px++ << j;	/* msb, make room for lsb */
  look |= ((int) *px) >> jq;
  look = look & 0xff;
 }
 if ((nb = ac_look_nbits[look]) != 0) {
     nbits = ac_look_sym[look];
     lbits = 8 -nb;
     r1 += nb;
 } else {
     /* get here if the code is longer than 8 bits or some kind of disaster */
     r1 += 8;	/* point after look */
     i=r1>>3;	j=r1%8;		px = x + i;
     /* get 16 bits in temp */
     temp = ((int) look) << 8;	/* look will be top 8 */
     if (j == 0) { temp = temp | ( (int) *px );
     } else {
     /* need 2 transfers, 8 bits total */
     jq = 8 - j;	/* available bits in px */
     temp |= ( *px++ << j );
     temp |= ( ((int) *px) >> jq);
     }
     /*printf("16 bit candidate in slow decode = %#x\n", temp);*/
     k = 7;	nb = 8;		/* here nb is code size - 1 */
     while ( (ks =(temp >> k)) > ac_maxcode[nb] ) { k--; nb++;
     	if (k < 0)  {
	printf("error at i,j,r1,idct = %d,%d,%d,%d\n",i,j,r1,idct);
	printf("temp, ks, k, nb =  %d,%d,%d,%d\n",temp, ks, k, nb);
	return huff_decode_err(1);
	}
	}
     /* note error return if we couldn't find a code */
     nbits = ac_huffval[ ac_valptr[nb] + ks - ac_mincode[nb] ];
     nb++;	/* actual size of code */
     r1 += (nb -8);
     lbits = 0;
 }
 /* that defines the ac symbol, contains a zero run length and bits in
 next non-zero coeff. */
 rs = nbits >> 4;	/* run length */
 nbits = nbits & 0xf;	/* bits in next one, unless 0, then a special code */
 if (nbits == 0) {
   if (rs != 15) break;		/* hit the end, rest are all zips */
   idct += 15;
 } else {
 idct += rs;			/* skip over the zeroes */
 /* need the next nbits bits */
 /* we have some still in look, lbits */
 /* if so, nb is valid, so use it to shift old look */
 if (lbits >= nbits) {  temp = (look>> (lbits-nbits)) & mask[nbits];
 } else {
 /* harder, need "nbits" # of bits */
 i=r1>>3;	j=r1%8;		px = x + i;
 jq = 8 - j;
 temp = ( *px  & mask[jq] );	/* gets us jq ls bits */
 ks = nbits - jq;		/* how much more needed (could be < 0) */
 if (ks>0) { temp = temp << ks;	/* shift over to make room */
  ks -=8;	temp2 = ((int) *++px);
  if (ks>0) { temp |= ( temp2 << ks );	/* need a third ? */
    ks -=8;	temp2 = ((int) *++px);
    }
 temp |= ( temp2 >> (-ks) );		/* last, #2 or #3 */
 } else { temp = temp >> (-ks); }
 }
 r1 += nbits;	/* count after this is done */
 /* now extend the sign, uses the jpeg-5a macro defined above */
 /* we use the lookup table version */
 nbits = huff_EXTEND(temp, nbits);
 /* this nbits is now the ac term */
 dct[idct] = (short) nbits;
 }
 }	/* end of idct loop for ac terms */
 dct += 64;
 }	/* end of nblocks loop */
 return 1;
 }
 /*------------------------------------------------------------------------- */
static int dct_err(n)
 int	n;
 {
 printf("DCT error # %d\n", n);
 return -1; 
 }
 /*------------------------------------------------------------------------- */
 /*---------------------------------------------------------------------------*/
static void rdct_cell(start, ystride, dctstart)
 /* does reverse quant. and dct for one cell */
 short	*start;
 float	*dctstart;
 int	ystride;
 {
 int i;
 float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 float tmp10, tmp11, tmp12, tmp13;
 float  z5, z11, z13, z10, z12;

 /* first de-zag and de-quantize */
 { register float *wsptr, *dctptr;
  wsptr = ws;	dctptr = dctstart;
  for (i=0; i < 64; i++) {
  wsptr[zag[i]] = *dctptr++ * fqtbl[i];
  }
 }
  /* Pass 1: process columns. */
  /* we don't check for columns of zeroes since this usually uses full
  precision */
  {
  register float *wsptr = ws;
  register float *fqtptr = fqtbl;
  int	nq = 8;
  while (nq--) {
    tmp0 = wsptr[0];
    tmp1 = wsptr[8*2];
    tmp2 = wsptr[8*4];
    tmp3 = wsptr[8*6];

    tmp10 = tmp0 + tmp2;	/* phase 3 */
    tmp11 = tmp0 - tmp2;

    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
    tmp12 = (tmp1 - tmp3) *  1.414213562 - tmp13; /* 2*c4 */

    tmp0 = tmp10 + tmp13;	/* phase 2 */
    tmp3 = tmp10 - tmp13;
    tmp1 = tmp11 + tmp12;
    tmp2 = tmp11 - tmp12;
    
    /* Odd part */

    tmp4 = wsptr[8];
    tmp5 = wsptr[8*3];
    tmp6 = wsptr[8*5];
    tmp7 = wsptr[8*7];

    z13 = tmp6 + tmp5;		/* phase 6 */
    z10 = tmp6 - tmp5;
    z11 = tmp4 + tmp7;
    z12 = tmp4 - tmp7;

    tmp7 = z11 + z13;		/* phase 5 */
    tmp11 = (z11 - z13) * ( 1.414213562); /* 2*c4 */

    z5 = (z10 + z12) * ( 1.847759065); /* 2*c2 */
    tmp10 = ( 1.082392200) * z12 - z5; /* 2*(c2-c6) */
    tmp12 = ( -2.613125930) * z10 + z5; /* -2*(c2+c6) */

    tmp6 = tmp12 - tmp7;	/* phase 2 */
    tmp5 = tmp11 - tmp6;
    tmp4 = tmp10 + tmp5;

    wsptr[0]   = tmp0 + tmp7;
    wsptr[8*7] = tmp0 - tmp7;
    wsptr[8]   = tmp1 + tmp6;
    wsptr[8*6] = tmp1 - tmp6;
    wsptr[8*2] = tmp2 + tmp5;
    wsptr[8*5] = tmp2 - tmp5;
    wsptr[8*4] = tmp3 + tmp4;
    wsptr[8*3] = tmp3 - tmp4;

    fqtptr++;
    wsptr++;		/* advance pointers to next column */
  } }

  /* Pass 2: process rows. */
  {
  register float *wsptr;
  register short *elemptr;
  int	nq = 8;
  wsptr = ws;	elemptr = start;
  while (nq--) {
      /* Even part */

    tmp10 = wsptr[0] + wsptr[4] + bias;
    tmp11 = wsptr[0] - wsptr[4] + bias;

    tmp13 = wsptr[2] + wsptr[6];
    tmp12 = (wsptr[2] - wsptr[6]) * ( 1.414213562) - tmp13;

    tmp0 = tmp10 + tmp13;
    tmp3 = tmp10 - tmp13;
    tmp1 = tmp11 + tmp12;
    tmp2 = tmp11 - tmp12;

    /* Odd part */

    z13 = wsptr[5] + wsptr[3];
    z10 = wsptr[5] - wsptr[3];
    z11 = wsptr[1] + wsptr[7];
    z12 = wsptr[1] - wsptr[7];

    tmp7 = z11 + z13;
    tmp11 = (z11 - z13) * ( 1.414213562);

    z5 = (z10 + z12) * ( 1.847759065); /* 2*c2 */
    tmp10 = ( 1.082392200) * z12 - z5; /* 2*(c2-c6) */
    tmp12 = ( -2.613125930) * z10 + z5; /* -2*(c2+c6) */

    tmp6 = tmp12 - tmp7;
    tmp5 = tmp11 - tmp6;
    tmp4 = tmp10 + tmp5;

    /* Final output stage, note bias was added in above */
    /* we don't range limit since results should be near exact */
    elemptr[0] = (short) (tmp0 + tmp7);
    elemptr[7] = (short) (tmp0 - tmp7);
    elemptr[1] = (short) (tmp1 + tmp6);
    elemptr[6] = (short) (tmp1 - tmp6);
    elemptr[2] = (short) (tmp2 + tmp5);
    elemptr[5] = (short) (tmp2 - tmp5);
    elemptr[4] = (short) (tmp3 + tmp4);
    elemptr[3] = (short) (tmp3 - tmp4);

    wsptr += 8;
    elemptr += ystride;		/* to next row */
  } }
 }
 /*---------------------------------------------------------------------------*/
static int rdct(image, nx, ny, nblocks, qtable, dct_array)
 /* reverse dct, quant, and re-order included, result is I*2 */
 /* nx and ny must be multiples of 8 (check in calling routine) */
 short	*dct_array;
 short	*qtable, *image;
 int	nx, ny, nblocks;
 {
 void rdct_cell();
 int	n, i, ncx, iq, ystride, icx, icy, row, col, j;
 float	dctfp[64], *pf;
 short	*dctstart, *pff;
 /* condition the q table for the dct's we created, must be same input qtable */
 i = 0;
 for (i = 0; i < 64; i++) {
   row = zag[i] >> 3;
   col = zag[i] & 7;
   fqtbl[i] = (float) qtable[i] * aansf[row] * aansf[col] * 0.125;
   }
 n = nx*ny/64;	/* number of cells */
 /* check consistency of nx, ny, and nblocks */
 if (nblocks != n) return dct_err(0);
 ncx = nx/8;	ystride = nx;
 dctstart = dct_array;
 
 for (i=0; i < n; i++) {
 /* note that we access image by 8x8 blocks but dct is stored seq. */
 icx = i%ncx;		icy = i/ncx;	iq = icx*8 + icy*8*ystride;
 /* make a fp copy of this cell */
 pf = dctfp;	pff = dctstart;
 for (j=0;j < 64; j++)  *pf++ = (float) *pff++;
 rdct_cell(&image[iq], ystride, dctfp);
 dctstart += 64;
 }
 return 1;	/* success return */
 }
static int scan_err(n)
 int	n;
 {
 printf("scan error # %d\n", n);
 return -1; 
 }
 /*------------------------------------------------------------------------- */
static int epic_scan(xbuffer, limit)
 /* scans the contents of x for a EPIC image */
 byte	xbuffer[];
 int	limit;
 {
 byte	*px, *pt, *pb;
 int	n, code, iblock = 0, nb, stat, icount;

 /* get memory for the DCT result, dct array is top level */
 dct = (short *) malloc(nblocks*64*sizeof(short));
 if (dct == NULL) { printf("malloc failed for DCT\n"); return 0;}
 bzero(dct, nblocks*64*sizeof(short));

 /* scan for padded markers and messages, decode each restart section after
 we finish the scan of it */
 px = xbuffer;
 pt = pb = px;
 iblock = 0;
 icount = 0;
 n = 0;		/* n will be the bytes found between restarts */
 /* nblocks is the total for the whole image, nb is the # for each restart
 interval, if that is 0 then nb = nblocks (i.e., no restarts) */
 if (restart_interval) nb = restart_interval; else nb = nblocks;
 //printf("nblocks, nb, restart_interval = %d %d %d\n",
 //	nblocks, nb, restart_interval);
 for (;;) {	/* scan until we hit limit or decode nblocks or error */
 if ( (*pt++ = *px++) == 0xff) {	/* a message or pad? */
  code = *px++ & 0x00ff;
  if (code == 0)
    /* just a padded ff, ff in stream already, just bump n */
     n++; else {
   if (code == 0xff) {
     /* this is 2 ff's */
     *pt++ = 0xff;	n +=2;	} else {
     if ( (code == 0xd9) || ((code & 0xf8) == 0xd0) )
     {
     /* printf("message code = %#x\n", code); */
     --pt;
     n_restarts_found += 1;
     /* either the end of image or a restart, process what we have */
     /* we assume that we only process a number of blocks equal to the
     restart_interval or whatever is left in image, note that normal
     TRACE images will always have an integer number of restart
     intervals */
     if ( (nblocks - iblock) < nb ) nb = nblocks - iblock;
     if (nb > 0) {
     /* printf("decoding, n, nb = %d, %d at %d, iblock = %d, ic = %d\n",
     	n, nb, pb, iblock,icount); */
     icount++;
     stat = huff_decode_dct(&dct[iblock*64], nb, pb, n);
     if (stat != 1) printf("error at iblock = %d\n", iblock);
     } else {
       /* got a restart or EOI and no data left, OK if EOI, in any
       case we have to stop */
       if (code == 0xd9) { printf("got the EOI\n"); } else {
       	  printf("abnormal end of image, beware!\n"); }
       break;
     }
 
     /* if the code was the end of image, we should be done */
     n = 0;	pb = pt = px;	iblock +=  nb; 
     if (code == 0xd9) { /* printf("got the EOI\n"); */ break; }
 
     } else {
     printf("unexpected message %#x at iblock = %d\n", code, iblock);
     n_unexpected += 1;
     /* check ahead to see if there is a restart message next */
     printf("%#x  %#x  %#x  %#x\n", *px, *(px+1), *(px+2), *(px+3) );
     /* treat as data and see if we choke */
     *pt++ = code;  n += 2;
     /*return scan_err(1);*/
     }
     } }
     /* all of the cases with a code use up 2 bytes, so */
     limit = limit -2;
  } else { limit--;  n ++; } /* if not a ff, only one byte consumed */

 if ( limit <= 0) { printf("jpeg12_scan - limit error\n");
 	printf("px-xbuffer = %d, n = %d\n", px-xbuffer, n);
 	printf("pt-xbuffer = %d, n = %d\n", pt-xbuffer, n);
 	break;}
  }
 return 1;
 }
 /*------------------------------------------------------------------------- */
static int scan_header(x, limit)
 /* scans the contents of x for JPEG12 header info */
 /* this can only handle simple headers with 12 bit precision and one
 component, NOT a general purpose JPEG decoder */
 byte	x[];
 int	limit;
 {
 byte	*px;
 int	q_flag=1, hdc_flag=1, hac_flag=1, soi_flag=1, dc_done=0, len;
 int	dri_flag=1, sos_flag=0, sof_flag=1;
 int	offset, code;
 
 px = x;
 for (;;) {
  /* look for markers and decode Q and Huffman tables as we find them */
  if (*px++ == 0xff) {
  /* got a code */
  code = (int) *px++;
  limit--;
  if (code != 0) {
   switch (code)  {
    case 0xd8:		/* SOI */
     soi_flag = 0;
     //printf("got the SOI\n");
     break;
    case 0xe0:		/* APP0 */
     /* next 2 bytes are the length */
     len = ((int) px[0]) * 256  + (int) px[1];
     //printf("APP0, len = %d\n", len);
     /* these should just say JFIF to ID the file type */
     /* we just skip over it here */
     px += len;
     break;
    case 0xfe:		/* COM */
     len = ((int) px[0]) * 256  + (int) px[1];
     //printf("COM (comment), len = %d\n", len);
     px += len;
     break;
    case 0xc4:		/* DHT */
     len = ((int) px[0]) * 256  + (int) px[1];
     //printf("DHT, len = %d\n", len);
     { int  iq, class, id, i, nv=0;
       byte	*bits, *vals, *pq;
       pq = px + 2;
       iq = *pq++; class = iq/16;	id = iq%16;
       //printf("class, id = %d %d\n", class, id);
       if (class || dc_done) {
        bits = ac_bits; vals = ac_huffval; hac_flag = 0;
        } else {
        bits = dc_bits; vals = dc_huffval; dc_done = 1; hdc_flag = 0;
        }
       for (i=0; i < 16;i++) { bits[i] = *pq++; nv += bits[i]; }
       //printf("nv = %d\n", nv);
       for (i=0; i < nv;i++) { vals[i] = *pq++; }
       /* another table ? */
       //printf("nv + 19 = %d\n", nv+19);
       if (len > (nv+20) ) {
	 nv = 0;
	 iq = *pq++; class = iq/16;	id = iq%16;
	 //printf("class, id = %d %d\n", class, id);
	 if (class || dc_done) {
          bits = ac_bits; vals = ac_huffval; hac_flag = 0;
          } else {
          bits = dc_bits; vals = dc_huffval; dc_done = 1; hdc_flag = 0;
          }
	 for (i=0; i < 16;i++) { bits[i] = *pq++; nv += bits[i]; }
	 //printf("nv = %d\n", nv);
	 for (i=0; i < nv;i++) { vals[i] = *pq++; }
       }
     }
     px += len;
     break;
    case 0xdb:		/* DQT */
     len = ((int) px[0]) * 256  + (int) px[1];
     //printf("DQT, len = %d\n", len);
     q_flag = 0;
     { int  iq, precision, id, i;
       iq = px[2]; precision = iq/16;	id = iq%16;
       //printf("precision, id = %d %d\n", precision, id);
       if (precision == 0)
          for (i=0; i < 64;i++) { qt[i] = (short) px[i+3]; }
       	  else
       	  for (i=0; i < 64;i++)
       	    qt[i] = (short) ( 256* (int) px[3+2*i] + (int) px[4+2*i]);
       /* dump for verification */
//        printf("Q table");
//        for (i=0; i < 64;i++) {
//        	if (i%8 == 0) printf("\n");  printf("%6d", qt[i]);
//        	}
//        	printf("\n");
     }
     px += len;
     break;
    case 0xc0:		/* SOF0 */
    case 0xc1:		/* SOF1 */
    case 0xc3:		/* SOF3 */
     len = ((int) px[0]) * 256  + (int) px[1];
     //printf("SOF, len = %d\n", len);
     sof_flag = 0;
     { int bpp, nc;
       bpp = px[2];
       //printf("bits/pixel = %d\n", bpp);
       ny = ((int) px[3]) * 256  + (int) px[4];
       nx = ((int) px[5]) * 256  + (int) px[6];
       nc = px[7];
       //printf("nx, ny = %d %d, components = %d\n", nx, ny, nc);
       if (bpp != 12 || nc != 1) {
         printf("not single component 12 bit JPEG!\n");  return -1; }
     }
     px += len;
     break;
    case 0xda:		/* SOS */
     len = ((int) px[0]) * 256  + (int) px[1];
     //printf("SOS, len = %d\n", len);
     sos_flag = 1;
     { int  nc;
       nc = px[2];
       //printf("# of components = %d\n", nc);
     }
     px += len;
     break;
    case 0xdd:		/* DRI */
     len = ((int) px[0]) * 256  + (int) px[1];
     //printf("DRI, len = %d\n", len);
     restart_interval = ((int) px[2]) * 256  + (int) px[3];
     dri_flag = 0;
     px += len;
     break;
    default:
     printf("unidentified marker %#x\n", code);
     break;
   }
  }
  }
  if (sos_flag) break;
  limit--;
  if (limit <= 0) {
  	printf("exceeded input size while scanning header!\n");
  	return -1; }
 }
 /* check that we got what we need */
 if (q_flag + hdc_flag + hac_flag + soi_flag + sof_flag)
   { printf("missing some needed markers in header\n");  return -1; }
 /* return the offset, we compute it from px (then we add it back but
 like to return a simple integer) */
 offset = px - x;
 return offset;
 }
 /*------------------------------------------------------------------------- */
int epic_decode(char *buf, int ns, short *image, int numOutputBytes)
 {
 int	stat, h_offset;
 int	bigendian;

 /* determine our endian */
 { int one = 1; bigendian = (*(char *)&one == 0); }
 
 if (ns <= 0) { printf("EPIC decode error, input size = %d\n", ns); return 1; }

 /* first scan the header for markers, get the Q and Huffman tables */
 h_offset = scan_header(buf, ns);
 //printf("h_offset = %d\n", h_offset);
 if (h_offset <= 0) { printf("failure in scan_header\n"); return 1; }
 buf = buf + h_offset;
 nblocks = nx*ny/64;

 /* re-organize the Huffman tables */
 huff_setups();

 /* and scan and decode */
 stat = epic_scan(buf, ns);
 if (stat != 1) printf("problem with epic_scan\n");

 bzero(image, nblocks*64*sizeof(short));
 stat = rdct(image, nx, ny, nblocks, qt, dct);
 if (stat != 1) printf("problem with rdct\n");
 
 return 0;
 }