/* internal ana subroutines and functions, this is file fun6.c  */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <sys/types.h>
#include <sys/time.h>
#include "ana_structures.h"
#include "defs.h"
#define ABS(x) ((x)>=0?(x):-(x))
#define	MIN(a,b) (((a)<(b))?(a):(b))
#define	MAX(a,b) (((a)>(b))?(a):(b))
 extern	int scrat[NSCRAT];
 extern	struct sym_desc sym[];
 extern	struct sym_list		*subr_sym_list[];
 extern	int	temp_base;
 extern	int	ana_float();
 extern	char *find_sym_name(int iq);
 extern	int	ana_type_size[];
 extern	int	vfix_top, num_ana_subr, next_user_subr_num;
 extern	int	lastmin_sym, lastmax_sym;
 extern	float	float_arg();
 extern	double	double_arg();
 extern double systime();
 int 	extractline_mode = 1;  /* aka !EXTRACTLINE_MODE */
 int	result_sym;
 int	cell_smooth_type = 1;
 int    dim[8];
 struct ahead   *h;
 union	types_ptr { byte *b; short *w; int *l; float *f; double *d;};		
 struct {
  int	nperim;
  int	*perimaddrs;
  int	count;
  int	*addrs;
  } blobList;
 /*------------------------------------------------------------------------- */
#define CKQ if (q < qbase || q > (qbase+n*m-1)) printf("q out of range %#x\n", q)
#define CKQA if (qabove < qbase || qabove > (qbase+n*m-1)) printf("qabove out of range %#x\n", qabove)
#define CKQB if (qbelow < qbase || qbelow > (qbase+n*m-1)) printf("qbelow out of range %#x\n", qbelow)
#define CKQR if (q < qbase || q > (qbase+n*m-1)) printf("q in run %#x\n", q)
#define CKQAR if (qabove < qbase || qabove > (qbase+n*m-1)) printf("qabove in run %#x\n", qabove)
#define CKQBR if (qbelow < qbase || qbelow > (qbase+n*m-1)) printf("qbelow in run %#x\n", qbelow)
/*-------------------------------------------------------------------------*/
int get_2d_array(int iq, int type, int **p, int *nx, int *ny)
  /* returns the number of elements for a 2-D symbol and a pointer to the
  first one, returns 0 if something is wrong, this checks that the array is a 2-D
  of a particular type if the type is >= 0, also requires nx and ny >= 3 */
  {
  int	nd, ntotal;
  struct	ahead	*h;
  if ( sym[iq].class != 4 ) { fprintf(stderr, "argument not an array\n"); return 0; }
  if (type >= 0) if ( sym[iq].type != type ) { fprintf(stderr, "argument not correct type\n"); return 0; }
  h = (struct ahead *) sym[iq].spec.array.ptr;
  nd = h->ndim;
  if (nd != 2) { fprintf(stderr, "argument not a 2-D array\n"); return 0; }
  *nx = h->dims[0];	*ny = h->dims[1];
  if (*nx < 4 || *ny < 4) {
    printf("array too small for this operation, nx, ny = %d %d\n", *nx, *ny); return 0; }
  ntotal = *nx * *ny;
  *p = (int *) ((char *)h + sizeof(struct ahead));
  return ntotal;
  }
/*-------------------------------------------------------------------------*/
int get_1d_array(int iq, int type, int **p)
  /* returns the number of elements for a 1-D symbol and a pointer to the
  first one, returns 0 if something is wrong, this checks that the array is a 1-D
  of a particular type if the type is >= 0 */
  {
  int	nd, ntotal;
  struct	ahead	*h;
  if ( sym[iq].class != 4 ) { fprintf(stderr, "argument not an array\n"); return 0; }
  if (type >= 0) if ( sym[iq].type != type ) { fprintf(stderr, "argument not correct type\n"); return 0; }
  h = (struct ahead *) sym[iq].spec.array.ptr;
  nd = h->ndim;
  if (nd != 1) { fprintf(stderr, "argument not a 1-D array\n"); return 0; }
  ntotal = h->dims[0];
  *p = (int *) ((char *)h + sizeof(struct ahead));
  return ntotal;
  }
/*-------------------------------------------------------------------------*/
int get_1d_sym_array(int iq, struct sym_desc **pbase)
  /* verifies that symbol is a sym array and returns count, or 0 if any problem,
   pointer to the the base of the sym array is returned in pbase */
  {
  int ntotal, nd;
  struct	ahead	*h;
  if ( sym[iq].class != SYM_ARR ) { fprintf(stderr, "argument is not a symbol array\n"); return 0; }
  h = (struct ahead *) sym[iq].spec.array.ptr;
  nd = h->ndim;
  if (nd != 1) { fprintf(stderr, "argument not a 1-D array\n"); return 0; }
  ntotal = h->dims[0];
  *pbase = (struct sym_desc *) ((char *)h + sizeof(struct ahead));
  return ntotal;
  }
 /*------------------------------------------------------------------------- */
int ana_sobel(int narg, int ps[])			/* SOBEL function with optional direction */
  {
  int ntotal, nx, ny, type, iq, n;
  union	types_ptr q1, q2;
  int	nd, i, j;
  float	*pt1, *pt2, *pang, *pt, *pt3, *ptop, *pbot;
  float *pbaserow, *pgradrow, *pangrow;
  float  gx, gy, ang, xq;
  iq = ps[0];
  ntotal = get_2d_array(iq, -1, (int **) &q1, &nx, &ny);
  if (ntotal <= 0) { return -1; }
  type = sym[ps[0]].type;
  /* use only float for initial tests */
  if (type != 3) { printf("SOBEL - input must be float\n"); return -1; }
  /* output is always float */
  result_sym = array_clone(iq,3);
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  q2.f = (float *) ((char *)h + sizeof(struct ahead));
  if (narg > 1) {
    iq = ps[1];
    dim[0] = nx;  dim[1] = ny;
    if (redef_array(iq, 3, 2, dim) != 1) return -1;
    h = (struct ahead *) sym[iq].spec.array.ptr;
    pang = (float *) ((char *)h + sizeof(struct ahead));
  }
  
  /* now check if we have a mask */
  //printf("narg = %d\n", narg);
  if (narg < 3) {
    /* no mask case */
    /* zero the edges */
    pt2 = q2.f;
    pt = q2.f + nx*(ny-1);
    n = nx;
    while (n--) { *pt2++ = 0.0;  *pt++ = 0.0; }
    pt2 = q2.f;
    pt = q2.f + nx - 1;
    n = ny;
    while (n--) { *pt2 = 0.0; pt2 += nx;  *pt = 0.0; pt += nx; }
    /* also the angle (aka orientation) if present */
    if (narg > 1) {
      pt2 = pang;
      pt = pang + nx*(ny-1);
      n = nx;
      while (n--) { *pt2++ = 0.0;  *pt++ = 0.0; }
      pt2 = pang;
      pt = pang + nx - 1;
      n = ny;
      while (n--) { *pt2 = 0.0; pt2 += nx;  *pt = 0.0; pt += nx; }
    }
    /* done with edges */
    pbaserow = q1.f;
    pgradrow = q2.f + nx + 1;
    pangrow = pang  + nx + 1;
    for (j=1;j<(ny-1);j++) {
      pbot = pbaserow;
      ptop = pbaserow + nx + nx + 2;
      pt2 = pgradrow;
      pt3 = pangrow;
      for (i=1;i<(nx-1);i++) {
	gx = gy =  *ptop-- - *pbot++;
	gy = gy + 2 * (*ptop-- - *pbot++);
	xq = *pbot - *ptop;
	gy = gy - xq;
	gx = gx + xq + 2 * (*(pbot + nx) - *(ptop - nx));
	*pt2++ = 0.125 * hypotf(gx, gy);
	if (narg >1 ) *pt3++ = atan2f(gy, gx);
	ptop += 3;  pbot--;
      }
      pbaserow += nx;
      pgradrow += nx;
      pangrow += nx;
    }
  } else {
     /* have a mask or an index array, must be 1-D I*4  for index or 2-D I*1 for mask */
    iq = ps[2];  if ( sym[iq].class != 4 ) return execute_error(66);
    h = (struct ahead *) sym[iq].spec.array.ptr;
    nd = h->ndim;
    if (nd == 1) { 
      /* can't be mask, check if legal index array */
      int ntotal, *indptr;
      ntotal = get_1d_array(iq, LONG, &indptr);
      printf("SOBEL - index array version not supported yet\n");
      return -1;
    } else {
      /* mask possibility, check */
      int ntotal, n, m, nc1, nc2, nc3;
      float *pbotrow, *ptoprow;
      byte *maskbase, *maskrow, *maskptr2;
      printf("mask case in SOBEL\n");
      ntotal = get_2d_array(iq, BYTE, (int **) &maskbase, &n, &m);
      if (ntotal <= 0 || n != nx || m != ny)
	{ printf("SOBEL - mask array must be 2-D byte array and dimensions must match image\n"); return -1; }
      /* OK, a mask it is then, we pre-zero the arrays so no need to clear edges */
      bzero(q2.f, ntotal * sizeof(float));
      bzero(pang, ntotal * sizeof(float));

      pbaserow = q1.f;
      pgradrow = q2.f + nx + 1;
      pangrow = pang  + nx + 1;
      maskrow = maskbase  + nx + 1;
      for (j=1;j<(ny-1);j++) {
	pbotrow = pbaserow;
	ptoprow = pbaserow + nx + nx + 2;
	pt2 = pgradrow;
	pt3 = pangrow;
	maskptr2 = maskrow;
	for (i=1;i<(nx-1);i++) {
	  ptop = ptoprow++;
	  pbot = pbotrow++;
	  if (*maskptr2++ ) {
	    gx = gy =  *ptop-- - *pbot++;
	    gy = gy + 2 * (*ptop-- - *pbot++);
	    xq = *pbot - *ptop;
	    gy = gy - xq;
	    gx = gx + xq + 2 * (*(pbot + nx) - *(ptop - nx));
	    *pt2 = 0.125 * hypotf(gx, gy);
	    if (narg >1 ) *pt3 = atan2f(gy, gx);
	  }
	  pt3++;  pt2++;
	}
	pbaserow += nx;
	pgradrow += nx;
	pangrow += nx;
	maskrow += nx;
      }

    }
  }
  return result_sym;
  }
 /*------------------------------------------------------------------------- */
int ana_dilate(narg,ps)			/* dilation function */
 /* the call is x = dilate(array), an 8-way dilation
    where array must be a 2-D array, returns the dilated bitmap */
 int	narg, ps[];
 {
 byte	*pbase, *qbase, *p, *q, *p_endline, *qabove, *qbelow;
 unsigned long	q_minus_p;
 int	iq, n, m, mq, type, ntotal;
	/* first arg must be a 2-D byte array */
  double t1, t2, t3, t4;
  t1 = systime();
 ntotal = get_2d_array(ps[0], BYTE, (int **) &pbase, &n, &m);
 dim[0] = n;		dim[1] = m;
 result_sym = array_scratch(0, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 qbase = (byte *) ((char *)h + sizeof(struct ahead));
 bzero(qbase, ntotal);
 q_minus_p = qbase - pbase - 1;
 
 p = pbase;
 q = qbase;

 /* the edges and corners are done as special cases */
 /* first corner */
 if (*p) { *q++ = 1;  *q = 1; q = qbase + n;  *q++ = 1; *q = 1;}
 /* starting row */
 p_endline = pbase + n - 2;
 p++;
  t2 = systime();
 while (1) {
  if (*p) {
  /* got a hit, this means we need to set 6 pixels in the output image */
  q = q_minus_p + p;   /* this is the q just before the corresponding p */
  qabove = q + n;
  *q++ = 1;		*q++ = 1;	*q++ = 1;
  *qabove++ = 1;	*qabove++ = 1;	*qabove++ = 1;
  if (p >= p_endline) break;
  p++;
  /* if we continue to get consecutive hits, just need to set next 2 */
  while (*p) {
   *q++ = 1;	*qabove++ = 1;
   if (p >= p_endline) break;
   p++; }
  }
  if (p >= p_endline) break;
  p++;
  }
  p++;
  /* last point in starting row, set independent of previous so we may
  set some pixels twice */
  if (*p) {  
  q = q_minus_p + p;   /* this is the q just before the corresponding p */
  qabove = q + n;
  *q++ = 1;		*q = 1;
  *qabove++ = 1;	*qabove = 1;
  }
  p++;

 /* central area */ 
 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
   /* start row, not top or bottom */
   /* left hand edge */
   if (*p) {	/* set 6 points */
     q = q_minus_p + 1 + p;	/* q at p */
     qabove = q + n;	qbelow = q - n;	
     *q++ = 1;		*q = 1;
     *qabove++ = 1;	*qabove = 1;
     *qbelow++ = 1;	*qbelow = 1;
   }
   p_endline = p + n - 2;
   p++;

   /* done with left edge, now the middle */

   while (1) {
    if (*p) {
      /* got a hit, this means we need to set 9 pixels in the output image */
      q = q_minus_p + p;
      qabove = q + n;	qbelow = q - n;	
      *q++ = 1;		*q++ = 1;	*q++ = 1;
      *qabove++ = 1;	*qabove++ = 1;	*qabove++ = 1;
      *qbelow++ = 1;	*qbelow++ = 1;	*qbelow++ = 1;
      if (p >= p_endline) break;
      p++;
      /* if we continue to get consecutive hits, just need to set next 3 */
      while (*p) {
	*q++ = 1;	*qabove++ = 1;	*qbelow++ = 1;
	if (p >= p_endline) break;
	p++;
      }
    }
    if (p >= p_endline) break;
    p++;
    }
    p++;

   /* the last point in row */
   if (*p) {  
     q = q_minus_p + p;   /* this is the q just before the corresponding p */
     qabove = q + n;	qbelow = q - n;	
     *q++ = 1;		*q = 1;
     *qabove++ = 1;	*qabove = 1;
     *qbelow++ = 1;	*qbelow = 1;
   }
   p++;

 }

 /* at last, the last row */
 /* left hand edge */
 if (*p) {	/* set 4 points */
   q = q_minus_p + 1 + p;	/* q at p */
   qbelow = q - n;	
   *q++ = 1;		 *q = 1;
   *qbelow++ = 1;	 *qbelow = 1;
 }
 p_endline = p + n - 2;
 p++;

 /* now the middle of the last row */
 
 while (1) {
   if (*p) {
     /* got a hit, this means we need to set 9 pixels in the output image */
     q = q_minus_p + p;
     qbelow = q - n;	
     *q++ = 1;		*q++ = 1;	*q++ = 1;
     *qbelow++ = 1;	*qbelow++ = 1;	*qbelow++ = 1;
     if (p >= p_endline) break;
     p++;
     /* if we continue to get consecutive hits, just need to set next 3 */
     while (*p) {
     *q++ = 1;	*qbelow++ = 1;
     if (p >= p_endline) break;
     p++; }
   }
   if (p >= p_endline) break;
   p++;
 }
 p++;
 /* printf("pbase, p_endline, p = %#x, %#x, %#x\n", pbase, p_endline, p); */
 /* the last point in last row */
 /* printf("pbase, p = %#x, %#x\n", pbase, p); */
 if (*p) {  
   q = q_minus_p + p;   /* this is the q just before the corresponding p */
   qbelow = q - n;	
   *q++ = 1;		*q = 1;
   *qbelow++ = 1;	*qbelow = 1;
 }
  t3 = systime();
  printf("dilate total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_dilatek(narg,ps)	/* dilation function with a specified kernel */
 /* the call is x = dilate(array, kbitmap)
    where array must be a 2-D bitmap array and kbitmap a 2-D kernel, returns the dilated bitmap
    the bitmaps are byte arrays */
 int	narg, ps[];
 {
 byte	*pbase, *qbase, *p, *q, *kbase;
 int	iq, n, m, mq, type, ntotal, nktotal, nxk, mxk, nk, nq, i, j, nx2, ny2, ii, jj;
 int	*ixoff, *iyoff, *pkx, *pky;
 double t1, t2, t3, t4;
 t1 = systime();
 /* first arg must be a 2-D byte array */
 ntotal = get_2d_array(ps[0], BYTE, (int **) &pbase, &n, &m);
 dim[0] = n;		dim[1] = m;
 result_sym = array_scratch(0, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 qbase = (byte *) ((char *)h + sizeof(struct ahead));
 bzero(qbase, ntotal);
 /* second arg must also be a 2-D byte array for the kernel */
 nktotal = get_2d_array(ps[1], BYTE, (int **) &kbase, &nxk, &mxk);
 /* compute offsets from the kernel bitmap, first find count of non-zero entries */
 nq = nktotal;  nk = 0;  p = kbase;
 while (nq--) {  if (*p++) nk++; }
 printf("kernel has %d bits set\n", nk);
 ixoff = malloc(nk*sizeof(int));  iyoff = malloc(nk*sizeof(int));
 nq = nktotal;  p = kbase;  iq = 0;
 nx2 = nxk/2;	ny2 = mxk/2;
 pkx = ixoff;	pky = iyoff;
 while (nq--) {
   if (*p++)  {
     i = iq % nxk;  j = iq / nxk;
     *pkx++ = i - nx2;   *pky++ = j - ny2;
   }
   iq++;
 }
//  nq = nk;  pkx = ixoff;	pky = iyoff;
//  while (nq--) {
//    printf("offsets: %4d  %4d\n", *pkx++, *pky++);
//  }
 t2 = systime();
 /* now loop through the main image and check if pixel set, dilation is applied
 only to set pixels */
 p = pbase;
 for (j=0;j<m;j++) {
   for (i=0;i<n;i++) {
     if (*p++) {
       /* so set the kernel around this in output array */ 
       nq = nk;
       pkx = ixoff;	pky = iyoff;
       while (nq--) {
         ii = i + *pkx++;  jj = j + *pky++;
	 if (ii >= 0 && jj >= 0 && ii < n && jj < m) {
	   iq = ii + jj * n;
	   *(qbase + iq) = 1;
	 }
       }
     }
   }
 }
 

 t3 = systime();
 printf("dilatek total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_erodek(narg,ps)	/* erode function with a specified kernel */
 /* the call is x = erodek(array, kbitmap)
    where array must be a 2-D bitmap array and kbitmap a 2-D kernel, returns the eroded bitmap
    the bitmaps are byte arrays */
 int	narg, ps[];
 {
 byte	*pbase, *qbase, *p, *q, *kbase;
 int	iq, n, m, mq, type, ntotal, nktotal, nxk, mxk, nk, nq, i, j, nx2, ny2, ii, jj;
 int	*ixoff, *iyoff, *pkx, *pky;
 double t1, t2, t3, t4;
 t1 = systime();
	/* first arg must be a 2-D byte array */
 ntotal = get_2d_array(ps[0], BYTE, (int **) &pbase, &n, &m);
 dim[0] = n;		dim[1] = m;
 result_sym = array_scratch(0, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 qbase = (byte *) ((char *)h + sizeof(struct ahead));
 bcopy(pbase, qbase, ntotal);
 /* second arg must also be a 2-D byte array for the kernel */
 nktotal = get_2d_array(ps[1], BYTE, (int **) &kbase, &nxk, &mxk);
 /* compute offsets from the kernel bitmap, first find count of non-zero entries */
 nq = nktotal;  nk = 0;  p = kbase;
 while (nq--) {  if (*p++) nk++; }
 printf("kernel has %d bits set\n", nk);
 ixoff = malloc(nk*sizeof(int));  iyoff = malloc(nk*sizeof(int));
 nq = nktotal;  p = kbase;  iq = 0;
 nx2 = nxk/2;	ny2 = mxk/2;
 pkx = ixoff;	pky = iyoff;
 while (nq--) {
   if (*p++)  {
     i = iq % nxk;  j = iq / nxk;
     *pkx++ = i - nx2;   *pky++ = j - ny2;
   }
   iq++;
 }
//  nq = nk;  pkx = ixoff;	pky = iyoff;
//  while (nq--) {
//    printf("offsets: %4d  %4d\n", *pkx++, *pky++);
//  }
 t2 = systime();
 /* now loop through the main image and check if pixel set, erosion is applied
 only to set pixels, we erode by checking if any pixels in the kernel zone are 0
 in the original image */
 p = pbase;
 for (j=0;j<m;j++) {
   for (i=0;i<n;i++) {
     if (*p++) {
       /* so check the kernel around this in input array and if a 0 found, 0 the output */ 
       nq = nk;
       pkx = ixoff;	pky = iyoff;
       while (nq--) {
         ii = i + *pkx++;  jj = j + *pky++;
	 if (ii >= 0 && jj >= 0 && ii < n && jj < m) {
	   iq = ii + jj * n;
	   if (*(pbase + iq) == 0) {
	     /* zero in output and skip the rest */
	     *(qbase + (p - pbase - 1)) = 0;
	     break;
	   }
	 }
       }
     }
   }
 }
 t3 = systime();
 printf("erodek total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_dilate4way(narg,ps)			/* dilation function */
 /* the call is x = dilate(array)
    where array must be a 2-D array, returns the dilated bitmap */
 int	narg, ps[];
 {
 byte	*pbase, *qbase, *p, *q, *p_endline, *qabove, *qbelow;
 unsigned long	q_minus_p;
 int	iq, n, m, mq, type, ntotal;
	/* first arg must be a 2-D byte array */
  double t1, t2, t3, t4;
  t1 = systime();
 ntotal = get_2d_array(ps[0], BYTE, (int **) &pbase, &n, &m);
 dim[0] = n;		dim[1] = m;
 result_sym = array_scratch(0, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 qbase = (byte *) ((char *)h + sizeof(struct ahead));
 bzero(qbase, ntotal);
 q_minus_p = qbase - pbase - 1;
 
 p = pbase;
 q = qbase;

 /* the edges and corners are done as special cases */
 /* first corner */
 if (*p) { *q++ = 1;  *q = 1; q = qbase + n;  *q++ = 1;}
 /* starting row */
 p_endline = pbase + n - 2;
 p++;
  t2 = systime();
 while (1) {
   if (*p) {
   /* got a hit, this means we need to set 6 pixels in the output image */
   q = q_minus_p + p;   /* this is the q just before the corresponding p */
   qabove = q + n + 1;
   *q++ = 1;		*q++ = 1;	*q++ = 1;
   *qabove++ = 1;
   if (p >= p_endline) break;
   p++;
   /* if we continue to get consecutive hits, just need to set next 2 */
   while (*p) {
     *q++ = 1;	*qabove++ = 1;
     if (p >= p_endline) break;
     p++; }
   }
   if (p >= p_endline) break;
   p++;
 }

 p++;
 /* last point in starting row, set independent of previous so we may
 set some pixels twice */
 if (*p) {
   q = q_minus_p + p;   /* this is the q just before the corresponding p */
   qabove = q + n;
   *q++ = 1;		*q = 1;
   *qabove = 1;
 }
 p++;


 /* central area */ 
 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
   /* start row, not top or bottom */
   /* left hand edge */
   if (*p) {	/* set 6 points */
     q = q_minus_p + 1 + p;	/* q at p */
     qabove = q + n;	qbelow = q - n;	
     *q++ = 1;		*q = 1;
     *qabove = 1;
     *qbelow = 1;
   }
   p_endline = p + n - 2;
   p++;

   /* done with left edge, now the middle */

   while (1) {
    if (*p) {
      /* got a hit, this means we need to set 9 pixels in the output image */
      q = q_minus_p + p;
      qabove = q + n + 1;	qbelow = q - n + 1;	
      *q++ = 1;		*q++ = 1;	*q++ = 1;
      *qabove++ = 1;
      *qbelow++ = 1;
      if (p >= p_endline) break;
      p++;
      /* if we continue to get consecutive hits, just need to set next 3 */
      while (*p) {
	*q++ = 1;	*qabove++ = 1;	*qbelow++ = 1;
	if (p >= p_endline) break;
	p++;
      }
    }
    if (p >= p_endline) break;
    p++;
  }
  p++;

  /* the last point in row */
  if (*p) {  
    q = q_minus_p + p;   /* this is the q just before the corresponding p */
    qabove = q + n+1;	qbelow = q - n+1;	
    *q++ = 1;		*q = 1;
    *qabove = 1;
    *qbelow = 1;
  }
  p++;

 }

 /* at last, the last row */
 /* left hand edge */
 if (*p) {	/* set 4 points */
   q = q_minus_p + 1 + p;	/* q at p */
   qbelow = q - n;	
   *q++ = 1;		 *q = 1;
   *qbelow++ = 1;
 }
 p_endline = p + n - 2;
 p++;

 /* now the middle of the last row */
 
 while (1) {
   if (*p) {
     /* got a hit, this means we need to set 9 pixels in the output image */
     q = q_minus_p + p;
     qbelow = q - n  + 1;	
     *q++ = 1;		*q++ = 1;	*q++ = 1;
     *qbelow++ = 1;
     if (p >= p_endline) break;
     p++;
     /* if we continue to get consecutive hits, just need to set next 2 */
     while (*p) {
     *q++ = 1;	*qbelow++ = 1;
     if (p >= p_endline) break;
     p++; }
   }
   if (p >= p_endline) break;
   p++;
 }
 p++;
 /* printf("pbase, p_endline, p = %#x, %#x, %#x\n", pbase, p_endline, p); */
 /* the last point in last row */
 /* printf("pbase, p = %#x, %#x\n", pbase, p); */
 if (*p) {  
   q = q_minus_p + p;   /* this is the q just before the corresponding p */
   qbelow = q - n + 1;	
   *q++ = 1;		*q = 1;
   *qbelow = 1;
 }
  t3 = systime();
  printf("dilate4way total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_dilate3(narg,ps)			/* dilation function */
 /* the call is x = dilate3(array), a 4-way dilation
    where array must be a 2-D array, returns the dilated bitmap */
 int	narg, ps[];
 {
 byte	*pbase, *qbase, *p, *q, *pq, **qarray, *qline, *qbelow, *qabove;
 unsigned long	q_minus_p;
 int	iq, n, m, mq, type, ntotal, i, j, nq, nmo, mmo;
	/* first arg must be a 2-D byte array */
 double t1, t2, t3, t4;
 t1 = systime();
 ntotal = get_2d_array(ps[0], BYTE, (int **) &pbase, &n, &m);
 dim[0] = n;		dim[1] = m;
 result_sym = array_scratch(0, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 qbase = (byte *) ((char *)h + sizeof(struct ahead));
 bzero(qbase, ntotal);
 
 /* output is q, cycle through all interior q pixels and check if any of the p's around it are
 set */
 nmo = n - 1;
 mmo = m - 1;
 p = pbase;
 q = qbase;
 qarray = (byte **) malloc(m * sizeof(byte *) );
 for (j=0;j<m;j++) { qarray[j] = qbase + j*n; }
//  printf("qbase = %#x\n", qbase);
//  printf("qarray[0] = %#x\n", qarray[0]);
//  printf("qarray[mmo] = %#x\n", qarray[mmo]);
 p = p + n + 1;
 qline = q + n;
 qbelow = qline - n;
 qabove = qline + n;
 t2 = systime();
 for (j=1;j<mmo;j++) {
   qline = qbase + n * j;
   qbelow = qline - n;
   qabove = qline + n;
   
   for (i=1;i<nmo;i++) {
      if (*p++){
// 	qarray[j][i] = 1;
// 	if (i>0) qarray[j][i-1] = 1;
// 	if (j>0) qarray[j-1][i] = 1;
// 	if (i<nmo) qarray[j][i+1] = 1;
// 	if (j<mmo) qarray[j+1][i] = 1;
// 	qarray[j][i-1] = 1;
// 	qarray[j-1][i] = 1;
// 	qarray[j][i+1] = 1;
// 	qarray[j+1][i] = 1;
        q = qline + i;
	*q = 1;   *(q-1) = 1;  *(q+1) = 1;
        *(qbelow +i) = 1;
        *(qabove +i) = 1;
      }
   }
   p = p + 2;
 }
 t3 = systime();
 printf("dilate3 total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
   1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_erode(narg,ps)                  /* erosion function */
 /* the call is x = erode(array)
    where array must be a 2-D array, returns the eroded bitmap */
 int    narg, ps[];
 {
 byte   *pbase, *qbase, *p, *q, *p_endline, *qabove, *qbelow;
 //unsigned long  q_minus_p;

 int    iq, n, m, mq, type;
 double t1, t2, t3, t4;
 t1 = systime();
 iq = ps[0];
         /* first arg must be a 2-D byte array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 type = sym[iq].type;
 if (type != 0) return execute_error(118);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 n = h->dims[0];        m = h->dims[1];
 dim[0] = n;            dim[1] = m;
 if ( n < 3 || m < 3 ) {
        printf("ERODE: array too small, nx, ny = %d %d\n", n, m);
        return -1; }
 pbase = (byte *) ((char *)h + sizeof(struct ahead));
 result_sym = array_scratch(0, 2, dim);         /*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 qbase = (byte *) ((char *)h + sizeof(struct ahead));

 bcopy(pbase, qbase, n*m);
 //q_minus_p = (qbase - pbase - 1);
 
 p = pbase;
 q = qbase;

 /* the edges and corners are done as special cases */
 /* first corner */
 if (*p == 0) {  /* zap the 3 neighbors */
 *q++;  *q = 0; q = qbase + n;  *q++ = 0; *q = 0;}
 /* starting row */
 p_endline = pbase + n - 2;
 p++;
  t2 = systime();
 while (1) {
  if (*p == 0) {
  /* got a hit, this means we need to clear 6 pixels in the output image */
  q = (qbase - pbase - 1) + p;   /* this is the q just before the corresponding p */
  qabove = q + n;
  *q++ = 0;             q++;       *q++ = 0;
  *qabove++ = 0;        *qabove++ = 0;  *qabove++ = 0;
  if (p >= p_endline) break;
  p++;
  /* if we continue to get consecutive hits, just need to set next 2 */
  while (*p == 0) {
   if (p >= p_endline) break;
   *q++ = 0;    *qabove++ = 0;
   p++; }
  }
  if (p >= p_endline) break;
  p++;
  }
  p++;
  /* last point in starting row, set independent of previous so we may
  set some pixels twice */
  if (*p == 0) {  
  q = (qbase - pbase - 1) + p;   /* this is the q just before the corresponding p */
  qabove = q + n;
  *q = 0;
  *qabove++ = 0;        *qabove = 0;
  }
  p++;


 /* central area */ 
 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
 /* start row, not top or bottom */
 /* left hand edge */
 if (*p == 0) {      /* set 6 points */
  q = (qbase - pbase - 1) + 1 + p;        /* q at p */
  qabove = q + n;       qbelow = q - n; 
  *q++;             *q = 0;
  *qabove++ = 0;        *qabove = 0;
  *qbelow++ = 0;        *qbelow = 0;
 }
 p_endline = p + n - 2;
 p++;

 /* done with left edge, now the middle */
 
 while (1) {
  if (*p == 0) {
  /* got a hit, this means we need to clear 8 pixels in the output image */
  q = (qbase - pbase - 1) + p;
  qabove = q + n;       qbelow = q - n; 
  *q++ = 0;             q++;       *q++ = 0;
  *qabove++ = 0;        *qabove++ = 0;  *qabove++ = 0;
  *qbelow++ = 0;        *qbelow++ = 0;  *qbelow++ = 0;
  if (p >= p_endline) break;
  p++;
  /* if we continue to get consecutive hits, just need to clear next 3 */
  while (*p == 0) {
   if (p >= p_endline) break;
   *q++ = 0;    *qabove++ = 0;  *qbelow++ = 0;
   p++; }
  }
  if (p >= p_endline) break;
  p++;
  }
  p++;


 /* the last point in row */
  if (*p == 0) {  
  q = (qbase - pbase - 1) + p;   /* this is the q just before the corresponding p */
  qabove = q + n;       qbelow = q - n; 
  *q = 0;
  *qabove++ = 0;        *qabove++ = 0;
  *qbelow++ = 0;        *qbelow++ = 0;
  }
  p++;

 }

 /* at last, the last row */
 /* left hand edge */
 if (*p == 0) {      /* set 4 points */
  q = (qbase - pbase - 1) + 1 + p;        /* q at p */
  qbelow = q - n;       
  q++;              *q = 0;
  *qbelow++ = 0;         *qbelow = 0;
 }
 p_endline = p + n - 2;
 p++;

 /* now the middle of the last row */
 
 while (1) {
  if (*p == 0) {
  /* got a hit, this means we need to clear 9 pixels in the output image */
  q = (qbase - pbase - 1) + p;
  qbelow = q - n;       
  *q++ = 0;             q++;       *q++ = 0;
  *qbelow++ = 0;        *qbelow++ = 0;  *qbelow++ = 0;
  if (p >= p_endline) break;
  p++;
  /* if we continue to get consecutive hits, just need to set next 3 */
  while (*p == 0) {
  if (p >= p_endline) break;
  *q++ = 0;     *qbelow++ = 0;  p++; }
  }
  if (p >= p_endline) break;
  p++;
  }
  p++;
  /* printf("pbase, p_endline, p = %#x, %#x, %#x\n", pbase, p_endline, p); */
 /* the last point in last row */
  /* printf("pbase, p = %#x, %#x\n", pbase, p); */
  if (*p == 0) {  
  q = (qbase - pbase - 1) + p;   /* this is the q just before the corresponding p */
  qbelow = q - n;       
  *q = 0;
  *qbelow++ = 0;        *qbelow = 0;
  }
  t3 = systime();
  printf("erode total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_dilate2(narg,ps)		/* dilation function */
 /* the call is x = dilate2(array)
    where array must be a 2-D array, returns the dilated bitmap */
 int	narg, ps[];
 {
 byte	*pbase, *qbase, *p, *q, *p_endline, *qabove, *qbelow;
 unsigned long	q_minus_p;

 int	iq, n, m, mq, type, i, j, k, km;
 int	offsx[9], offsy[9], offs[9], offs_tmp[9], offs_left[9];
 int	offs_right[9], *pf;
 int	noff = 9, nq;
  double t1, t2, t3, t4;
  t1 = systime();
 iq = ps[0];
	 /* first arg must be a 2-D byte array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 type = sym[iq].type;
 if (type != 0) return execute_error(118);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 n = h->dims[0];	m = h->dims[1];
 dim[0] = n;		dim[1] = m;
 if ( n < 3 || m < 3 ) {
 	printf("DILATE: array too small, nx, ny = %d %d\n", n, m);
	return -1; }
 pbase = (byte *) ((char *)h + sizeof(struct ahead));
 result_sym = array_scratch(0, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 qbase = (byte *) ((char *)h + sizeof(struct ahead));
 bzero(qbase, n*m);
 q_minus_p = qbase - pbase - 1;
 
 p = pbase;
 q = qbase;

 /* set up the offset array */
 for (i=0;i<3;i++) { for (j=0;j<3;j++) { k = i + j*3;
 	offsx[k] = i-1;	offsy[k] = j-1; } }
 for (i=0;i<9;i++) { offs[i] = offsx[i] + offsy[i] * n; }
 
 /* generate the left and right versions */
 for (i=0;i<9;i++) {
   offs_left[i]  = MAX(offsx[i], 0) + offsy[i] * n;
   offs_right[i] = MIN(offsx[i], 0) + offsy[i] * n;
 }
  t2 = systime();

 /* the edges and corners are done as special cases */
 /* first corner */
 for (i=0;i<9;i++) { offs_tmp[i] = MAX(offsx[i], 0) + MAX(offsy[i],0) * n;}
 nq = noff;
 pf = offs_tmp;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;

 /* top row, sans corners */
 k = n - 2;
 for (i=0;i<9;i++) { offs_tmp[i] = offsx[i] + MAX(offsy[i],0) * n;}
 while (k--) {
 nq = noff;
 pf = offs_tmp;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;
 }
 
 /* right corner in top row */
 for (i=0;i<9;i++) { offs_tmp[i] = MIN(offsx[i], 0) + MAX(offsy[i],0) * n;}
 nq = noff;
 pf = offs_tmp;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;

 /* interior rows */
 km = m - 2;
 while (km--) {
 /* left edge */
 nq = noff;
 pf = offs_left;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;
 /* interior of row */
 k = n - 2;
 while (k--) {
 nq = noff;
 pf = offs;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;
 }
 /* right edge */
 nq = noff;
 pf = offs_right;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;
 }

 /* last row */
 /* left corner */ 
 for (i=0;i<9;i++) { offs_tmp[i] = MAX(offsx[i], 0) + MIN(offsy[i],0) * n;}
 nq = noff;
 pf = offs_tmp;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;

 /* last row, sans corners */
 k = n - 2;
 for (i=0;i<9;i++) { offs_tmp[i] = offsx[i] + MIN(offsy[i],0) * n;}
 while (k--) {
 nq = noff;
 pf = offs_tmp;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }
 p++;	q++;
 }
 
 /* right corner in last row */
 for (i=0;i<9;i++) { offs_tmp[i] = MIN(offsx[i], 0) + MIN(offsy[i],0) * n;}
 nq = noff;
 pf = offs_tmp;
 while (nq--) { if ( *(p + *pf++)) { *q = 1; break; } }

  t3 = systime();
  printf("dilate2 total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_reorder(narg,ps)                /* reorder function */
 /* the call is x = reorder(array, order)
    where array must be a 2-D array, returns the re-ordered result */
 /* reordering is reversals and transposes of a 2-D array, there are
 8 ways to "flip" an array, the "order" ranges from 0-7 accordingly */
 int    narg, ps[];
 {
 int  iorder, iq, result_sym, type, nx, ny, n, m, inc;
 byte   *p, *q, *qsave, *ptr;
 if (int_arg_stat(ps[1], &iorder) != 1) return -1;
 /* get pointer to array, must be 2-D here */
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(66);
 type = sym[iq].type;
 // if (type != 0) return execute_error(118);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 nx = h->dims[0];        ny = h->dims[1];
 ptr = (byte *) ( (char *) h + sizeof( struct ahead ) );
 if (iorder >= 4) { m = nx;  nx = ny; ny = m; }
 dim[0] = nx;  dim[1] = ny;
 result_sym = array_scratch(type, 2, dim);         /* for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 q = (byte *) ((char *)h + sizeof(struct ahead));
 p = (byte *) ptr;
 if (iorder == 0)        /* no change, make a copy */
	bcopy(p, q, nx*ny*ana_type_size[type]);  
 else {
 /* outer switch for type, inners for orientation */
 
 switch (type) {
 case BYTE:
   if (iorder <4) {
   register  byte *pp, *qq;
   register  int  nn, mm, nxx, inc;
   nxx = nx;
   mm = ny;
   qq = q;

   switch (iorder) {
   case 1:        /* reverse in x */
    pp = p + nx;  inc = 2*nx;
    while (mm--) { nn = nxx; while (nn) { *qq++ = *--pp; nn--;}
	pp += inc;}
    break;
   case 2:        /* just reverse in y */
    pp = p + nx * ny - nx;  inc = -2*nx;
    while (mm--) { nn = nxx;  while (nn) {*qq++ = *pp++; nn--;} pp += inc; }
    break;
   case 3:        /* reverse in x and y */
    pp = p + nx*ny;
    while (mm--) { nn = nxx;  while (nn) { *qq++ = *--pp; nn--; } }
    break;
   } }
   else {
   register  byte *pp, *qq;
   register  int  nn, mm, nyy, inc;
   mm = ny;
   qq = q;
   switch (iorder) {
   case 4:        /* transpose in x and y */
    pp = p;  inc = -nx*ny + 1;  nyy = ny;  break;
   case 5:        /* transpose plus reverse in y */
    pp = p +nx*ny - ny;  inc = nx*ny + 1;  nyy = -ny; break;
    case 6:       /* transpose plus reverse in x */
    pp = p + ny - 1;  inc = -nx*ny - 1; nyy = ny; break;
   case 7:        /* transpose plus reverse in x,y */
    pp = p + ny*nx - 1;  inc = nx*ny - 1;  nyy = -ny; break;
   } 
   while (mm--) { nn= nx;  while (nn) {*qq++ = *pp;  pp += nyy; nn--;}
  	  pp += inc; }
   }
  break;
 case WORD:
   if (iorder <4) {
   register  short *pp, *qq;
   register  int  nn, mm, nxx, inc;
   nxx = nx;
   mm = ny;
   qq = (short *) q;

   switch (iorder) {
   case 1:        /* reverse in x */
    pp = (short *) p + nx;  inc = 2*nx;
    while (mm--) { nn = nxx; while (nn) { *qq++ = *--pp; nn--;}
	pp += inc;}
    break;
   case 2:        /* just reverse in y */
    pp = (short *) p + nx * ny - nx;  inc = -2*nx;
    while (mm--) { nn = nxx;  while (nn) {*qq++ = *pp++; nn--;} pp += inc; }
    break;
   case 3:        /* reverse in x and y */
    pp = (short *) p + nx*ny;
    while (mm--) { nn = nxx;  while (nn) { *qq++ = *--pp; nn--; } }
    break;
   } }
   else {
   register  short *pp, *qq;
   register  int  nyy, nn, mm;
   switch (iorder) {
   case 4:        /* transpose in x and y */
    pp = (short *) p;  inc = -nx*ny + 1;  nyy = ny;  break;
   case 5:        /* transpose plus reverse in y */
    pp = (short *) p +nx*ny - ny;  inc = nx*ny + 1;  nyy = -ny; break;
   case 6:       /* transpose plus reverse in x */
    pp = (short *) p + ny - 1;  inc = -nx*ny - 1; nyy = ny; break;
   case 7:        /* transpose plus reverse in x,y */
    pp = (short *) p + ny*nx - 1;  inc = nx*ny - 1;  nyy = -ny; break;
   }
   //{
   //register  short *qq;
   //register  int  nn, mm;
   qq = (short *) q;
   mm = ny;
   while (mm--) { nn= nx;  while (nn) {*qq++ = *pp;  pp += nyy; nn--;}
  	  pp += inc; }
   //}
   }
   break;
 case LONG:
   if (iorder <4) {
   register  int *pp, *qq;
   register  int  nn, mm, nxx;
   nxx = nx;
   mm = ny;
   qq = (int *) q;

   switch (iorder) {
   case 1:        /* reverse in x */
    {
    register  int *pp = (int *) p + nx;
    register  inc = 2*nx;
    while (mm--) { nn = nxx; while (nn) { *qq++ = *--pp; nn--;}
	pp += inc;}
    }
    break;
   case 2:        /* just reverse in y */
    pp = (int *) p + nx * ny - nx;  inc = -2*nx;
    while (mm--) { nn = nxx;  while (nn) {*qq++ = *pp++; nn--;} pp += inc; }
    break;
   case 3:        /* reverse in x and y */
    pp = (int *) p + nx*ny;
    while (mm--) { nn = nxx;  while (nn) { *qq++ = *--pp; nn--; } }
    break;
   } }
   else {
   register  int *pp, *qq;
   register  int  nn, mm, nyy;
   mm = ny;
   qq = (int *) q;
   switch (iorder) {
   case 4:        /* transpose in x and y */
    pp = (int *) p;  inc = -nx*ny + 1;  nyy = ny;  break;
   case 5:        /* transpose plus reverse in y */
    pp = (int *) p +nx*ny - ny;  inc = nx*ny + 1;  nyy = -ny; break;
    case 6:       /* transpose plus reverse in x */
    pp = (int *) p + ny - 1;  inc = -nx*ny - 1; nyy = ny; break;
   case 7:        /* transpose plus reverse in x,y */
    pp = (int *) p + ny*nx - 1;  inc = nx*ny - 1;  nyy = -ny; break;
   } 
   while (mm--) { nn= nx;  while (nn) {*qq++ = *pp;  pp += nyy; nn--;}
  	  pp += inc; }
   }
   break;
 case FLOAT:
   if (iorder <4) {
   register  float *pp, *qq;
   register  int  nn, mm, nxx;
   nxx = nx;
   mm = ny;
   qq = (float *) q;

   switch (iorder) {
   case 1:        /* reverse in x */
    pp = (float *) p + nx;  inc = 2*nx;
    while (mm--) { nn = nxx; while (nn) { *qq++ = *--pp; nn--;}
	pp += inc;}
    break;
   case 2:        /* just reverse in y */
    pp = (float *) p + nx * ny - nx;  inc = -2*nx;
    while (mm--) { nn = nxx;  while (nn) {*qq++ = *pp++; nn--;} pp += inc; }
    break;
   case 3:        /* reverse in x and y */
    pp = (float *) p + nx*ny;
    while (mm--) { nn = nxx;  while (nn) { *qq++ = *--pp; nn--; } }
    break;
   } }
   else {
   register  float *pp, *qq;
   register  int  nn, mm, nyy;
   mm = ny;
   qq = (float *) q;
   switch (iorder) {
   case 4:        /* transpose in x and y */
    pp = (float *) p;  inc = -nx*ny + 1;  nyy = ny;  break;
   case 5:        /* transpose plus reverse in y */
    pp = (float *) p +nx*ny - ny;  inc = nx*ny + 1;  nyy = -ny; break;
    case 6:       /* transpose plus reverse in x */
    pp = (float *) p + ny - 1;  inc = -nx*ny - 1; nyy = ny; break;
   case 7:        /* transpose plus reverse in x,y */
    pp = (float *) p + ny*nx - 1;  inc = nx*ny - 1;  nyy = -ny; break;
   } 
   while (mm--) { nn= nx;  while (nn) {*qq++ = *pp;  pp += nyy; nn--;}
  	  pp += inc; }
   }
   break;
 case DOUBLE:
   if (iorder <4) {
   register  double *pp, *qq;
   register  int  nn, mm, nxx;
   nxx = nx;
   mm = ny;
   qq = (double *) q;

   switch (iorder) {
   case 1:        /* reverse in x */
    pp = (double *) p + nx;  inc = 2*nx;
    while (mm--) { nn = nxx; while (nn) { *qq++ = *--pp; nn--;}
	pp += inc;}
    break;
   case 2:        /* just reverse in y */
    pp = (double *) p + nx * ny - nx;  inc = -2*nx;
    while (mm--) { nn = nxx;  while (nn) {*qq++ = *pp++; nn--;} pp += inc; }
    break;
   case 3:        /* reverse in x and y */
    pp = (double *) p + nx*ny;
    while (mm--) { nn = nxx;  while (nn) { *qq++ = *--pp; nn--; } }
    break;
   } }
   else {
   register  double *pp, *qq;
   register  int  nn, mm, nyy;
   mm = ny;
   qq = (double *) q;
   switch (iorder) {
   case 4:        /* transpose in x and y */
    pp = (double *) p;  inc = -nx*ny + 1;  nyy = ny;  break;
   case 5:        /* transpose plus reverse in y */
    pp = (double *) p +nx*ny - ny;  inc = nx*ny + 1;  nyy = -ny; break;
    case 6:       /* transpose plus reverse in x */
    pp = (double *) p + ny - 1;  inc = -nx*ny - 1; nyy = ny; break;
   case 7:        /* transpose plus reverse in x,y */
    pp = (double *) p + ny*nx - 1;  inc = nx*ny - 1;  nyy = -ny; break;
   } 
   while (mm--) { nn= nx;  while (nn) {*qq++ = *pp;  pp += nyy; nn--;}
  	  pp += inc; }
   }
   break;
 } }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
static void rdct_spike(start, ystride, ws)
 /* does reverse dct for one cell, specialize for a spike smooth */
 short	*start;
 float	*ws;
 int	ystride;
 {
 int i, j, n;
  float tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
 float tmp10, tmp11, tmp12, tmp13;
 float z1, z2, z3, z4, z5, z11, z13, z10, z12;

 /* no de-zag and de-quantize here */
  /* Pass 1: process columns. */
  /* we don't check for columns of zeroes since this usually uses full
  precision */
  {
  register float *wsptr = ws;
  int	nq = 8;
  while (nq--) {
    tmp0 = wsptr[0];
    tmp1 = wsptr[8*2];
    tmp2 = wsptr[8*4];
    tmp3 = wsptr[8*6];

    tmp10 = tmp0 + tmp2;	/* phase 3 */
    tmp11 = tmp0 - tmp2;

    tmp13 = tmp1 + tmp3;	/* phases 5-3 */
    tmp12 = (tmp1 - tmp3) *  1.414213562 - tmp13; /* 2*c4 */

    tmp0 = tmp10 + tmp13;	/* phase 2 */
    tmp3 = tmp10 - tmp13;
    tmp1 = tmp11 + tmp12;
    tmp2 = tmp11 - tmp12;
    
    /* Odd part */

    tmp4 = wsptr[8];
    tmp5 = wsptr[8*3];
    tmp6 = wsptr[8*5];
    tmp7 = wsptr[8*7];

    z13 = tmp6 + tmp5;		/* phase 6 */
    z10 = tmp6 - tmp5;
    z11 = tmp4 + tmp7;
    z12 = tmp4 - tmp7;

    tmp7 = z11 + z13;		/* phase 5 */
    tmp11 = (z11 - z13) * ( 1.414213562); /* 2*c4 */

    z5 = (z10 + z12) * ( 1.847759065); /* 2*c2 */
    tmp10 = ( 1.082392200) * z12 - z5; /* 2*(c2-c6) */
    tmp12 = ( -2.613125930) * z10 + z5; /* -2*(c2+c6) */

    tmp6 = tmp12 - tmp7;	/* phase 2 */
    tmp5 = tmp11 - tmp6;
    tmp4 = tmp10 + tmp5;

    wsptr[0]   = tmp0 + tmp7;
    wsptr[8*7] = tmp0 - tmp7;
    wsptr[8]   = tmp1 + tmp6;
    wsptr[8*6] = tmp1 - tmp6;
    wsptr[8*2] = tmp2 + tmp5;
    wsptr[8*5] = tmp2 - tmp5;
    wsptr[8*4] = tmp3 + tmp4;
    wsptr[8*3] = tmp3 - tmp4;

    //fqtptr++;
    wsptr++;		/* advance pointers to next column */
  } }

  /* Pass 2: process rows. */
  {
  register float *wsptr;
  register short *elemptr;
  int	nq = 8;
  wsptr = ws;	elemptr = start;
  while (nq--) {
      /* Even part */

    //tmp10 = wsptr[0] + wsptr[4] + bias;
    //tmp11 = wsptr[0] - wsptr[4] + bias;
    tmp10 = wsptr[0] + wsptr[4];
    tmp11 = wsptr[0] - wsptr[4];

    tmp13 = wsptr[2] + wsptr[6];
    tmp12 = (wsptr[2] - wsptr[6]) * ( 1.414213562) - tmp13;

    tmp0 = tmp10 + tmp13;
    tmp3 = tmp10 - tmp13;
    tmp1 = tmp11 + tmp12;
    tmp2 = tmp11 - tmp12;

    /* Odd part */

    z13 = wsptr[5] + wsptr[3];
    z10 = wsptr[5] - wsptr[3];
    z11 = wsptr[1] + wsptr[7];
    z12 = wsptr[1] - wsptr[7];

    tmp7 = z11 + z13;
    tmp11 = (z11 - z13) * ( 1.414213562);

    z5 = (z10 + z12) * ( 1.847759065); /* 2*c2 */
    tmp10 = ( 1.082392200) * z12 - z5; /* 2*(c2-c6) */
    tmp12 = ( -2.613125930) * z10 + z5; /* -2*(c2+c6) */

    tmp6 = tmp12 - tmp7;
    tmp5 = tmp11 - tmp6;
    tmp4 = tmp10 + tmp5;

    /* Final output stage, note bias was added in above */
    /* we don't range limit since results should be near exact */
    elemptr[0] = (short) (tmp0 + tmp7);
    elemptr[7] = (short) (tmp0 - tmp7);
    elemptr[1] = (short) (tmp1 + tmp6);
    elemptr[6] = (short) (tmp1 - tmp6);
    elemptr[2] = (short) (tmp2 + tmp5);
    elemptr[5] = (short) (tmp2 - tmp5);
    elemptr[4] = (short) (tmp3 + tmp4);
    elemptr[3] = (short) (tmp3 - tmp4);

    wsptr += 8;
    elemptr += ystride;		/* to next row */
  } }
 }
 /*---------------------------------------------------------------------------*/
int despike(
     int *array, unsigned char *mask, int nx, int ny, float frac, int niter, /* void **outarray, */
     int *badblobs, int sizeofbads,
     int *nspikes, int **oldvalues, int **spikelocs, int **newvalues,
     int datatype, char *wavestring, int blobflipflag);
 /*---------------------------------------------------------------------------*/
int ana_despikeaia(narg,ps)                /* despike function, simplified for AIA tests */
 /* the call is
   x = despike(array, [mask, frac, level, niter, badblobs, nspikes, spikevals, spikelocs, newvalues,
   wavestring, blobflipflag]) */
 /* 3/20/2010 - changed from a func to a subr since the AIA code now does in place and
 does not return a new array, the original is modified */
 int    narg, ps[];
 {
 int	iq, result_sym, type, nx, ny, n, m, level=7, sum, nc;
 int	lognb2, jj, jc, stat, dim[8];
 int	nxc, nyc, niter=1, nxmsk, nymsk;
 int	save_niter, ntotal, npix, blobflipflag = 0;
 float	frac = 0.25, fsum, cfrac, tq, fdif;
 int  *ptr, *oldvalues, *p, *newvalues;
 unsigned char	*mptr, msv;
 //void   *outarray;
 int	nspikes, *badblobs, nbads, *spikelocs;
 union	types_ptr jpbase;
 char	*wavestring;
 /* default for bad pixels */
 badblobs = 0;  nbads = 0;
 if (narg > 2) if (float_arg_stat(ps[2], &frac) != 1) return -1;
 if (narg > 3) if (int_arg_stat(ps[3], &level) != 1) return -1;
 if (narg > 4) if (int_arg_stat(ps[4], &niter) != 1) return -1;
 if (narg > 5) {
   iq = ps[5];
   /* if not an array, we take the no bad default */
   if ( sym[iq].class == 4 ) {
     type = sym[iq].type;
     if (type != 2) return execute_error(111);
     h = (struct ahead *) sym[iq].spec.array.ptr;
     badblobs = (unsigned char *) ( (char *) h + sizeof( struct ahead ) );
     if ( h->ndim != 1 ) { printf("bad array must be 1-D\n");  return -1; }
     nbads = h->dims[0];
   }
 }
 if (narg > 10) {
   /* this is a string, might be null */
   if ( sym[ ps[10] ].class != 2 ) return execute_error(70);
   wavestring = (char *) sym[ ps[10] ].spec.array.ptr;
 } else wavestring = 0;
 if (narg > 11) if (int_arg_stat(ps[11], &blobflipflag) != 1) return -1;
 /* get pointer to array, must be 2-D here */
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(66);
 type = sym[iq].type;
 if (type != 2) return execute_error(133); /* must be I*4 as of 3/17/2010 */
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 nx = h->dims[0];        ny = h->dims[1];
 if (nx < 5 || ny < 5 ) {
 	printf("dimensions must be 5 or greater, nx, ny = %d %d\n", nx,ny);
 	return -1; }
 ptr = (int *) ( (char *) h + sizeof( struct ahead ) );
 /* the mask is optional for ana but needed for the despike call but if no real
 mask we can pass a null pointer, if the ana argument is not an array or not
 present, that's what we do */
  if (narg > 1) {
    iq = ps[1];
    if ( sym[iq].class != 4 ) { mptr = 0; } else {
      /* force to be a byte array */
      iq = ana_byte(1,&iq);
      h = (struct ahead *) sym[iq].spec.array.ptr;
      if ( h->ndim != 2 ) return execute_error(101);
      nxmsk = h->dims[0];        nymsk = h->dims[1];
      if (nxmsk != nx || nymsk != ny) {
        printf("array and mask dimensions must match, you have nx, ny = %d %d for array\n", nx,ny);
	printf("and %d %d for mask\n", nxmsk, nymsk);
      }
      mptr = (unsigned char *) ( (char *) h + sizeof( struct ahead ) );
    }
  } else { mptr = 0; }
//  stat = despike(ptr, nx, ny, frac, level, niter, &outarray, badblobs, nbads,
//         &nspikes, &oldvalues, &spikelocs, &newvalues);
 stat = despike(ptr, mptr, nx, ny, frac, niter, /* &outarray, */ badblobs, nbads,
        &nspikes, &oldvalues, &spikelocs, &newvalues, type, wavestring, blobflipflag);
//  dim[0] = nx;  dim[1] = ny;
//  result_sym = array_scratch(type, 2, dim);         	/* for the result */
//  h = (struct ahead *) sym[result_sym].spec.array.ptr;
//  jpbase.l = (int *) ((char *)h + sizeof(struct ahead));
//  bcopy(outarray, (void *) jpbase.l, nx*ny*ana_type_size[type]);
//  free(outarray);
 printf("# of spikes from despike = %d\n", nspikes);
 if (narg > 6) {
   iq = ps[6];
   redef_scalar(iq,2,&nspikes);
 }
 if (nspikes < 0) nspikes = 0;
 if (narg > 7) {
   iq = ps[7];
   if (nspikes <= 0) { 
     redef_scalar(iq, 2, &nspikes);
   } else {
     dim[0] = nspikes;
     if (redef_array(iq, 2, 1, dim) != 1) return -1;
     h = (struct ahead *) sym[iq].spec.array.ptr;
     p = (int *) ((char *)h + sizeof(struct ahead));
     bcopy(oldvalues, p, nspikes*sizeof(int));
   }
   if (narg > 8) {
     iq = ps[8];
     if (nspikes <= 0) { 
       redef_scalar(iq, 2, &nspikes);
     } else {
       if (redef_array(iq, 2, 1, dim) != 1) return -1;
       h = (struct ahead *) sym[iq].spec.array.ptr;
       p = (int *) ((char *)h + sizeof(struct ahead));
       bcopy(spikelocs, p, nspikes*sizeof(int));
     }
     if (narg > 9) {
       iq = ps[9];
       if (nspikes <= 0) { 
	 redef_scalar(iq, 2, &nspikes);
       } else {
	 if (redef_array(iq, 2, 1, dim) != 1) return -1;
	 h = (struct ahead *) sym[iq].spec.array.ptr;
	 p = (int *) ((char *)h + sizeof(struct ahead));
	 bcopy(newvalues, p, nspikes*sizeof(int));
       }
     }
   }
 }
 if (oldvalues) free(oldvalues);
 if (newvalues) free(newvalues);
 if (spikelocs) free(spikelocs);
 return 1;
 }
 /*---------------------------------------------------------------------------*/
#define ALN2I 1.442695022
#define TINY 1.0e-5
int ana_despike(narg,ps)                /* despike function */
 /* the call is x = despike(array, [frac, level, niter, cell_flag, rms]) */
 int    narg, ps[];
 {
 int	iq, result_sym, type, nx, ny, n, m, level=7, sum, nc, cell_flag=0;
 int	lognb2, cell_malloc = 0, badcells = 0, cell_count, jj, jc;
 int	nxc, nyc, cell_flag_sign, niter=1, rms_flag_sign, rms_flag;
 int	sign_flag, bad_flag, bb_flag, save_niter, ntotal;
 byte	*cell_status, *pcell;
 float	frac = 0.25, fsum, cfrac, tq, rms=0.0, fdif;
 short   *p, *q, *ptr, *p1, *p2, *p3, *out, *out2;
 short	arr[16], *pps, *ss;
 double	t1, t2, t3, t4;
 float	absmin = 1.0;
 t1 = systime();
 lognb2=(log((double) 16)*ALN2I+TINY);

 if (narg > 1) if (float_arg_stat(ps[1], &frac) != 1) return -1;
 if (narg > 2) if (int_arg_stat(ps[2], &level) != 1) return -1;
 if (narg > 3) if (int_arg_stat(ps[3], &niter) != 1) return -1;
 if (narg > 4) if (int_arg_stat(ps[4], &cell_flag) != 1) return -1;
 if (narg > 5) if (float_arg_stat(ps[5], &rms) != 1) return -1;

 if (cell_flag >0) cell_flag_sign = 1;  else cell_flag_sign = 0;
 cell_flag = ABS(cell_flag);
 if (rms != 0.0) { rms_flag = 1;
 if (rms > 0) {rms_flag_sign = 1; } else { rms_flag_sign = 0;  rms = -rms;}
 } else rms_flag = 0;
 /* get pointer to array, must be 2-D here */
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(66);
 type = sym[iq].type;
 if (type != 1) return execute_error(111); /* must be I*2 */
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 nx = h->dims[0];        ny = h->dims[1];
 if (nx < 5 || ny < 5 ) {
 	printf("dimensions must be 5 or greater, nx, ny = %d %d\n", nx,ny);
 	return -1; }
 /* the cell_flag is used to indicate that we destroy entire cells which
 are corrupted by cell_flag or more spikes, but this is intended only
 for TRACE style JPEG data so we insist that the dimensions are multiples
 of 8. Extend to include two checks indicated by cell_flag and rms_flag,
 both need bad cells marked */
 bb_flag = cell_flag || rms_flag;	/* use bb_flag */
 if (bb_flag) {
  if (nx%8 || ny%8) bb_flag = 0;
 }
 /* if we survived that, get the cell count, we use scrat to store cell
 results */
 if (bb_flag) {
  cell_count = nx*ny/64;
  if (cell_count <= (NSCRAT * sizeof(int) )) cell_status = (byte *) scrat;
    else { cell_status = (byte *) malloc( cell_count);
    if (!cell_status) return execute_error(13);
    cell_malloc = 1;
    }
  bzero(cell_status, cell_count);
 }
 ptr = (short *) ( (char *) h + sizeof( struct ahead ) );
 dim[0] = nx;  dim[1] = ny;
 result_sym = array_scratch(type, 2, dim);         	/* for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 out = (short *) ((char *)h + sizeof(struct ahead));	/* output */
 p = (short *) ptr;					/* input  */
 cfrac = 1.0/(1.0 + frac);	/* 3/2/2010 changed from 1.0 - frac which had less range */
 nc = ntotal = 0;
 nxc = nx/8;	nyc = ny/8;
 niter = ABS(niter);
 if (niter > 20) { printf("DESPIKE - error, excessive # of iterations = %d\n",
 	niter);  return -1; }

 /* add internal iteration 10/8/98 */
 /* if there are 2 or more iterations, we need an extra array, we can't do the
 despike step in place because it "erodes" from the low y direction. */
 if (niter > 1) {
  out2 = (short *) malloc(nx*ny*sizeof(short));
  if (!out2) return execute_error(13);
 }
 save_niter = niter;

 t2 = systime();
 while (niter--) {
   /* depending on niter%2, we choose where q points, together with the
   ptr substitution at the end of the loop, we ping pong between the two
   areas, always ending on the out array */
   if (niter%2) { q = out2; } else { q = out; }
   /* load top 2 rows */
   m = 2;
   while (m--) {
     p = ptr;  n = nx;  while(n--) *q++ = *p++;
     ptr = ptr + nx;
   }
   /* skip the outer edges while testing algorithms */
   m = ny-4; jj = 2;  /* jj is used for cell addressing */
   while (m--) {
     /* skip the outer edges while testing algorithms */
     p = ptr;
     n = 2;
     while (n--) *q++ = *p++;
     n = nx-4;
     p2 = p - 1;
     p1 = p2 - nx;
     p3 = p2 + nx;
     while (n--) {
      /* add the 8 around this point */
      tq = (cfrac * (float) *p);
      sum = *p1 + *(p1+1) + *(p1+2) + *p2 + *(p2+2) + *p3 + *(p3+1) + *(p3+2);
      //p1++;  p2++;  p3++;
      fsum = (float) sum * 0.125;
      /* now the test, 2/27/2010 change <= to < to avoid hits from all zeroes */
      if ( fsum < tq  && ((tq-fsum) > absmin) ) {  /* we have a bady, zap it */
	/* some more checks, also have to exceed all values by some amount */
	int  smax, smin;
	nc++;
	/* check if we are black balling cells */
	if (bb_flag) {
	  /* get the cell index */
	  jc = jj/8;
	  iq = (p-ptr)/8 + jc*(nxc);
	  //printf("i,j in array = %d, %d  ", (p-ptr),jj);
	  //printf("cell index = %d, %d\n",(p-ptr)/8,jc);
	  cell_status[iq] += 1;
	}
	if (level < 0) { *q++ = 0; p++; } else {
	 /* load up sort array and find the desired one */
	 ss = arr;	pps = p - 2*nx -2;
	 *ss++ = *pps++; *ss++ = *pps++; *ss++ = *pps++; *ss++ = *pps++; *ss++ = *pps++;
	 *ss++ = *(p - nx -2);  *ss++ = *(p - nx +2);
	 *ss++ = *(p -2);  *ss++ = *(p +2);
	 *ss++ = *(p + nx -2);  *ss++ = *(p + nx +2);
	 pps = p +2 *nx - 2;
	 *ss++ = *pps++; *ss++ = *pps++; *ss++ = *pps++; *ss++ = *pps++; *ss++ = *pps++;
	  /* a built in sorter here */
	  { int nn,m,j,i,n=16;
	     short t;
	     m=n;
	     for (nn=1;nn<=lognb2;nn++) {
		m >>= 1;
	       for (j=m;j<n;j++) {
		  i=j-m;
		  t=arr[j];
		  while (i >= 0 && arr[i] > t) {
		    arr[i+m]=arr[i];
		    i -= m;
		  }
		  arr[i+m]=t;
	      }
	    }
	 }
	 /* now get the indicated one using level */
	 *q++ = arr[level];  p++;
         }

       } else {
         *q++ = *p++;    /* looks OK (this time) */
       }
       p1++;  p2++;  p3++;
       
     }
     n = 2;
     while (n--) *q++ = *p++;
     ptr = ptr + nx; jj++;
   }
   /* load last 2 rows */
   m = 2;
   while (m--) {
   p = ptr;  n = nx;  while(n--) *q++ = *p++;
   ptr = ptr + nx;
   } 
   /* end of this iteration, reconfigure for next one, note that for
   a single iteration we don't need out2 */
   printf("despike got %d spikes in iteration %d\n", nc, niter);
   ntotal += nc;  nc = 0;
   if (niter%2) { ptr = out2; } else { ptr = out; }
 }
 if (save_niter > 1) free(out2);
 printf("despike got %d spikes in %d iterations\n", ntotal, save_niter);
 t3 = systime();


 //printf("cell flag 3 = %d\n",cell_flag); 
 if (bb_flag) {
  int badcells = 0;
  float	dc[9], ws[64], *pf;
  int 	i, ix, jx, ic, stride = nx - 8, ii, jj, istart, sq,k;
  int	ioff[3], joff[3];
  for (i=0;i<cell_count;i++) {
   /* we can have a bad cell by the number of spikes (the strike out
   option) and/or we can check the rms, if doing both we check the
   strikes first of course */
    if (!cell_status[i]) continue;  /* if no spikes at all, skip */
    bad_flag = 0;
    if (cell_flag && (cell_status[i] >= cell_flag)) { bad_flag = 1;
    	sign_flag = cell_flag_sign; }
    if (rms_flag && !bad_flag) {
     short	*ps1, *ps2, *tmp;
     sign_flag = rms_flag_sign;
     /* check the rms for any cell with a spike */
     jc = i/nxc;		ic = i%nxc;
      istart = ic*8 + jc*8*nx;
      q = out + istart;
      ps1 = q;	ps2 = q+1;
      fsum = 0.0;
      fdif = 0.0;
      nc = 8;
      /* get the checkerboard metric */
      while (nc--) { m = 4; while(m--) {
      	fsum += (float) *q++; fsum += (float) *q++;
	fdif = fdif + *ps1 - *ps2;
	ps1 += 2;	ps2 += 2;
      	}
      tmp = ps1; ps1 = ps2 + stride;  ps2 = tmp + stride;
      q += stride; }
      /* printf("rms/fsum = %12.2f,  i = %d\n", ABS(fdif)/fsum, i); */
      if (fsum != 0.0) {
       if ( (ABS(fdif)/fsum) > rms) { bad_flag = 1;
      	/* printf("hit at i = %d\n", i); */ 
      	} }
    }
    if (bad_flag) {
    badcells++;
    jc = i/nxc;		ic = i%nxc;
    /* testing various cell smooth options */
    if (sign_flag == 0) cell_smooth_type = 0; else cell_smooth_type = 1;
    switch (cell_smooth_type) {
    case 0:
    dc[4] = 0.0;
    /* we need the means of the neighboring 8 cells */
    if (ic>0) ioff[0] = ic - 1; else ioff[0] = ic+1;
    ioff[1] = ic;
    if (ic < (nxc-1)) ioff[2] = ic+1; else ioff[2] = ic-1;
    if (jc>0) joff[0] = jc - 1; else joff[0] = jc+1;
    joff[1] = jc;
    if (jc < (nyc-1)) joff[2] = jc+1; else joff[2] = jc-1;
    
    for (ii=0;ii<3;ii++) {
     ix = ioff[ii]*8;
     for (jj=0;jj<3;jj++) {
      if (ii != 1 || jj != 1) {
      jx = joff[jj]*8;
      istart = ix + jx*nx;
      q = out + istart;
      k = ii + jj*3;
      fsum = 0.0;
      nc = 8;
      while (nc--) { m = 8; while(m--) fsum += (float) *q++; q += stride; }
      dc[k] = fsum*0.015625; /* that's 1/64 */
      dc[4] += dc[k];
      }
   } }
    dc[4] = dc[4]*0.125;
    //printf("the surround\n%6.1f   %6.1f   %6.1f\n%6.1f   %6.1f   %6.1f\n%6.1f   %6.1f   %6.1f\n",
    //dc[0],dc[1],dc[2],dc[3],dc[4],dc[5],dc[6],dc[7],dc[8]);
    /* set target cell just to mean for next test */
    //printf("dc4 = %6.1f\n", dc[4]);
    //sq = (int) (dc[4] + .5);
    q = out + ic*8 + jc*8*nx;
    nc = 8;
    if (sign_flag)
      //while (nc--) { m = 8; while(m--) *q++ = sq; q += stride; }
      { /* try the AC predict trick for a smooth gradient over the cell */
      nc = 64;  pf = ws; while (nc--) *pf++ = 0.0;
      ws[0] = dc[4];
      //ws[1] = 0.125*1.13885*(dc[3] - dc[5]);
      //ws[8] = 0.125*1.13885*(dc[1] - dc[7]);
      //ws[16] = 0.125*0.27881*(dc[1]+dc[7] - 2.*dc[4]);
      //ws[9] = 0.125*0.16213*((dc[0]-dc[2])-(dc[6]-dc[8]));
      //ws[2] =0.125* 0.27881*(dc[3]+dc[5] - 2.*dc[4]);
      ws[1] = 1.13885*(dc[3] - dc[5]);
      ws[8] = 1.13885*(dc[1] - dc[7]);
      ws[16] =  0.27881*(dc[1]+dc[7] - 2.*dc[4]);
      ws[9] = 0.16213*((dc[0]-dc[2])-(dc[6]-dc[8]));
      ws[2] = 0.27881*(dc[3]+dc[5] - 2.*dc[4]);
      //printf("the ws array\n%6.1f %6.1f %6.1f\n%6.1f %6.1f\n%6.1f\n",
      //	ws[0],ws[1],ws[2],ws[8],ws[9],ws[16]);
      rdct_spike(q, nx, ws);
      }
    else
      while (nc--) { m = 8; while(m--) *q++ = 0; q += stride; }
    
    //printf("start address %d, (i,j) = %d, %d  ",ix,ic*8,jc*8);
    //printf("cell index = %d, %d\n", ic, jc);
    break;
    case 1:
    {
    int	a1, a2, acc, t1, t2, nxmo=nx-8 , nymo=ny-8;
    /* get array index for start of cell */
    ix = ic*8;  jx = jc*8;
    istart = ix + jx*nx;
    p = out + istart;  /* first point in cell */
    q = p;
    /* do 2 passes, first in x then y, allows in place smoothing but
    loses a bit in the accumulation */
    nc = 8;	/* the 8 rows */
    while (nc--) {
    /* get a point to left of cell if available */
    if (ix) { p--;	
    	a2 = (int)*p++;
    	a1 = (int)*p++;
    	a2 = a2 + a1;
     } else {
    	a1 = (int)*p++;
    	a2 = a1 + a1;
     }
     /* proceed over 7 points */
     m = 7;
     while (m--) {
     acc = *p++;  t1 = acc;  acc += a1;  a1 = t1;
     t2 = acc;  acc += a2;  a2 = t2;
     *q++ = (short) (acc>>2); }
     /* for the last value, use a point to right of cell if available */
     if (ix < nxmo ) {
     *q++ = (short) ((*p + a1 + a2)>>2);
     } else {
     *q++ = (short) ((a1 + a1 + a2)>>2);
     }
     p += stride;	q = p;
    }
    /* now in the other direction */
    p = out + istart;  /* first point in cell */
    p2 = p;
    q = p;
    nc = 8;	/* the 8 columns */
    while (nc--) {
    /* get a point to bottom of cell if available */
    if (jx) { p = p - nx;
   	a2 = (int)*p;	p += nx;
    	a1 = (int)*p;	p += nx;
    	a2 = a2 + a1;
     } else {
    	a1 = (int)*p;	p += nx;
    	a2 = a1 + a1;
     }
     /* proceed over 7 points */
     m = 7;
     while (m--) {
     acc = *p;	p += nx;  t1 = acc;  acc += a1;  a1 = t1;
     t2 = acc;  acc += a2;  a2 = t2;
     *q = (short) (acc>>2); q += nx; }
     /* for the last value, use a point to right of cell if available */
     if (jx < nymo ) {
     *q = (short) ((*p + a1 + a2)>>2);
     } else {
     *q = (short) ((a1 + a1 + a2)>>2);
     }
     p = p2 + 1;	q = p2 = p;
     }


    }
    break;
    }
 } }
 if (cell_malloc) free(cell_status);
 printf("bad cell count = %d\n", badcells);
 }
 t4 = systime();
 printf("despike times, setup %10.6fs, iters %10.6fs, cells %10.6fs\n", t2-t1,t3-t2,t4-t3);
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_cubemedian(int narg, int ps[])
 /* input is a data cube with t as the last dimension, result is an image
 representing the temporal median
 median = cubemedian(cube) */
 {
 int	iq, class, xtype, ytype, ndx, ndy, nsym, nd, i, j, nt, dim[2];
 char	*ptr, *pdata, *pcube;
 int	*pt, ncount, nq, ny, nx, nxmo, nymo, it_flag, nit, ix, iy, np, nty;
 int	sclass, *px, *py, *pxx, *pyy, stride, type, pstride;
 int	lognb2, level;
 struct	ahead	*h;
 struct sym_desc *pdesc, *pbase;
 iq = ps[0];
 sclass = class = sym[iq].class;
 printf("sclass = %d\n", sclass);
 switch (sclass) {
   case SYM_ARR:
     if ( (nsym = get_p_c(&iq, &pdesc)) <= 0) return -1;
     pbase = pdesc;
     ncount = nsym;
     type = 0;
     break;
   case ARRAY:
     /* must be 3-D and (for now) a I*2 array */
     h = (struct ahead *) sym[iq].spec.array.ptr;
     type = sym[iq].type;
     if (type != 1) return execute_error(111);
     ndx = h->ndim;
     /* 2/19/2008 - accept 3 or 4-D */
     if (ndx != 3 && ndx != 4) return execute_error(136);
     ncount = nsym = h->dims[2];
     if (ndx == 4) ncount = ncount * h->dims[3];
     /* get the first dimension, also need the second for range checks */
     nx = h->dims[0];  ny = h->dims[1];  nxmo = nx - 1;  nymo = ny - 1;
     pcube = (char *) ((char *) h + sizeof( struct ahead ));
     stride = nx * ny;
     break;
   default:
     return execute_error(136);
 }
 if (ncount < 3) { printf("cubemedian needs 3 or more x/y planes, not just %d\n", ncount);  return -1; }
 /* now create the array for the median image, it will be np by ncount */
 /* 2/18/2008 - extended from byte only to using type */
 dim[0] = nx;	dim[1] = ny;
 result_sym = array_scratch(type, 2, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 ptr = (char *) ( (char *) h + sizeof( struct ahead ) );
 lognb2 = (log((double) ncount)*ALN2I+TINY);  /* needed for median sort */
 level = ncount/2;  /* our median choice */
 //printf("lognb2 = %d\n", lognb2);
 switch (sclass) {
   case SYM_ARR:
     printf("not supported yet\n");
     return -1;
     break;
   case ARRAY:
     switch (type) {
       case WORD:
         /* here we have to use I*2 pointers */
	 {
	   short *pcubes = (short *) pcube;
	   short *p, *py, *pt, *pa, *arr;
	   short *ptrs = (short *) ptr;
	   int	i, j;
	   arr = (short *) malloc(ncount*sizeof(short));
	   if (!arr) { printf("malloc error in local mask copy\n");  return 1; }
	   /* ptrs is for result */
	   py = pcube;
	   //printf("nx, ny, ncount,stride = %d,%d,%d,%d\n", nx, ny, ncount,stride);
	   //printf("level = %d\n", level);
	   for (j=0; j < ny; j++) {
	     p = py;
	     for (i=0; i < nx; i++) {
	       nq = ncount;
	       pt = p;
	       pa = arr;
	       /* load the sort buffer */
	       while (nq--) { *pa++ = *pt;  pt += stride; }
	       /* now sort, a built in sorter here*/
	       { int nn,m,j,i,n=ncount;
		  int t;
		  m=n;
		  for (nn=1;nn<=lognb2;nn++) {
		     m >>= 1;
		    for (j=m;j<n;j++) {
		       i=j-m;
		       t=arr[j];
		       while (i >= 0 && arr[i] > t) {
			 arr[i+m]=arr[i];
			 i -= m;
		       }
		       arr[i+m]=t;
		   }
		 }
	       }
	       *ptrs++ = arr[level];
	       p++;
	     }
	     py += nx;
	   }
	   free(arr);
	 }
	 break;
       default: return execute_error(32); break;
     }
     break;
   default: return execute_error(32); break;
   }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_extractline(int narg, int ps[])
 /* experimental, written with symbol arrays in mind, motivation is to
 avoid copying the symbol arrays in an expression, assuming that the
 desired product is a space/time slice in a byte array, could be extended
 to other types if the need arises by complicating the code, presently
 we assume a mistake if one of the symbol arrays is not byte
 the call is:  y = extractline(sym_arr, xs, ys, it)
 where xs and ys can be 2-D for time dependence, "it" can be a scalar or an
 array of selected indices, this is to allow selecting a subset of images
 from the symbol array, if "it" is not present, all symbols are processed 
 and xs and xs must have >= the # of symbols or be singular in the time
 dimension (nt = 1) */
 /* 2/18/2008 - extend the cubes to I*2 arrays */
 /* if lower bit of extractline_mode is 1 then y is loaded reversed in the
 t (2nd) dimension since we want to display with time increasing upward without
 having to reverse the array later, but this is now optional for other situations */
 {
 int	iq, class, xtype, ytype, ndx, ndy, nsym, nd, i, j, nt, dim[2];
 char	*ptr, *pdata, *pcube;
 int	*pt, ncount, nq, ny, nx, nxmo, nymo, it_flag, nit, ix, iy, np, nty;
 int	sclass, *px, *py, *pxx, *pyy, stride, type, pstride;
 struct	ahead	*h;
 struct sym_desc *pdesc, *pbase;
 iq = ps[0];
 sclass = class = sym[iq].class;
 //printf("sclass = %d\n", sclass);
 switch (sclass) {
   case SYM_ARR:
     if ( (nsym = get_p_c(&iq, &pdesc)) <= 0) return -1;
     pbase = pdesc;
     ncount = nsym;
     type = 0;
     break;
   case ARRAY:
     /* must be 3-D */
     /* 2/18/2008 - extend the cubes to I*2 arrays */
     h = (struct ahead *) sym[iq].spec.array.ptr;
     type = sym[iq].type;
     if (type > 1 || type < 0) return execute_error(136);
     ndx = h->ndim;
     /* 2/19/2008 - accept 3 or 4-D */
     if (ndx != 3 && ndx != 4) return execute_error(136);
     ncount = nsym = h->dims[2];
     if (ndx == 4) ncount = ncount * h->dims[3];
     /* get the first dimension, also need the second for range checks */
     nx = h->dims[0];  ny = h->dims[1];  nxmo = nx - 1;  nymo = ny - 1;
     pcube = (char *) ((char *) h + sizeof( struct ahead ));
     stride = nx * ny;
     break;
   default:
     return execute_error(136);
 }

 // get pointers to the coordinate arrays
 iq = ps[1];	// xs
 //printf("iq = %d\n", iq);
 /* cast it to a I*4 array, rounding */
 iq = ana_rfix(1,&iq);
 class = sym[iq].class;
 if (class != ARRAY) return execute_error(66);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 ndx = h->ndim;
 if (ndx > 2) return execute_error(92);
 if (ndx == 2) nt = h->dims[1]; else nt = 1;
 np = h->dims[0];
 xtype = sym[iq].type;
 px = (int *) ((char *) h + sizeof( struct ahead ));

 iq = ps[2];	// ys
 /* cast it to a I*4 array, rounding */
 iq = ana_rfix(1,&iq);
 class = sym[iq].class;
 if (class != ARRAY) return execute_error(66);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 ndy = h->ndim;
 if (ndy > 2) return execute_error(92);
 if (ndy == 2) nty = h->dims[1]; else nty = 1;
 /* xs and ys must match */
 if (ndx != ndy || np != h->dims[0] || nt != nty) {
 	printf("spatial arrays don't match/n");
 	return -1; }
 ytype = sym[iq].type;
 py = (int *) ((char *) h + sizeof( struct ahead ));

 /* it argument may be a scalar or a 1-D array or not present */
 if (narg > 3) {
  it_flag = 1;
  iq = ps[3];	// it
  /* cast it to a I*4 array, rounding */
  iq = ana_rfix(1,&iq);
  if ( (nit = get_p_c(&iq, &pt)) <= 0) return -1;
  class = sym[iq].class;
  if (class != SCALAR && class != ARRAY && class != SCAL_PTR) {
     printf("illegal variable for time index array\n");
     return -1; }
  /* ncount can be larger then nsym, but members need to be limit checked later */
  ncount = nit;			// so ncount will be the temporal dimension
  				// of the slice
 } else {
  /* no "it" array, so we use all symbols in sym_arr */
  it_flag = 0;
 }
 /* nt must either be 1 or >= nsym */
 if (nt > 1) {
   if (nt < nsym) {
     printf("pixel coordinate arrays short in time dimension, %d vs %d\n",
      	       nt, nsym);
     return -1; }
 }

 /* now create the array for the space/time slice, it will be np by ncount */
 /* 2/18/2008 - extended from byte only to using type */
 dim[0] = np;	dim[1] = ncount;
 //printf("np, ncount, type = %d %d %d, nt = %d\n", np, ncount, type, nt);
 result_sym = array_scratch(type, 2, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 ptr = (char *) ( (char *) h + sizeof( struct ahead ) );
 if (extractline_mode & 1) pstride = -2*np; else pstride = 0;
 /* may want to extend to normal 3-D cubes but presently just symbol arrays */
 switch (sclass) {
   case SYM_ARR:
     /* we want to load from bottom up for easier display, so get start of last
     line */
     if (extractline_mode & 1) ptr = ptr + np*(ncount - 1);
     for (i=0; i < ncount; i++) {
     /* get the symbol number from it array or just increment */
      if (it_flag) {
      	j = *pt++;
      	/* clip on boundaries */
      	j = MAX(j, 0);		j = MIN(j, nsym-1);
      	pdesc = pbase + j;
      } else {
        j = i;
	pdesc = pbase + i;
      }
      /* we now have the selected symbol, verify it is a byte array, assume
      an error if it is not a 2-D byte array, this could be extended in the
      future */
      h = (struct ahead *) pdesc->spec.array.ptr;
      if (pdesc->class != ARRAY || (nd = h->ndim) != 2 || pdesc->type != 0) {
        printf("inappropiate data class or type in symbol array\n");
        printf("nd = %d, class = %d, type = %d\n", nd, pdesc->class,pdesc->type);
        return -1; }
      pdata = (char *) ( (char *) h + sizeof( struct ahead ) );
      /* get the first dimension, also need the second for range checks */
      nx = h->dims[0];  ny = h->dims[1];  nxmo = nx - 1;  nymo = ny - 1;
      //printf("nx, ny = %d, %d\n", nx, ny);
      /* the xs and ys may be fixed or changing with i */
      /* in either case, the set we want now is in px and py */
      nq = np;
      /* need to use j for px and py offsets if these vary with t */
      if (nt > 1) { 
       pxx = px + j*np;
       pyy = py + j*np;
      } else {
       pxx = px;   pyy = py;
      }
      while (nq--) {
       ix = *pxx++;
       /* range clip */
       if (ix < 0) ix = 0; else ix = MIN(ix, nxmo);
       iy = *pyy++;
       if (iy < 0) iy = 0; else iy = MIN(iy, nymo);
       //printf("ix, iy = %d, %d\n", ix, iy);
       // ptr is the result, note that it is loaded backwards in t
       *ptr++ = *(pdata + ix + iy*nx);
       }
      ptr = ptr + pstride;
     }
     break;
   case ARRAY:
     /* the array has already been checked */
     switch (type) {
       case BYTE:
	 /* we want to load from bottom up for easier display, so get start of last
	 line */
         if (extractline_mode & 1) ptr = ptr + np*(ncount - 1);
	 for (i=0; i < ncount; i++) {
	 /* get the plane number from it array or just increment */
	  if (it_flag) {
      	    j = *pt++;
      	    /* clip on boundaries */
      	    j = MAX(j, 0);		j = MIN(j, nsym-1);
      	    pdata = pcube + j * stride;
	  } else {
            j = i;
      	    pdata = pcube + i * stride;
	  }
	  /* the rest is similar to the symbol array case, using px and py
	  in the same way together with pdata */
	  nq = np;
	  /* need to use j for px and py offsets if these vary with t */
	  if (nt > 1) { 
	   pxx = px + j*np;
	   pyy = py + j*np;
	  } else {
	   pxx = px;   pyy = py;
	  }
	  while (nq--) {
	   ix = *pxx++;
	   /* range clip */
	   if (ix < 0) ix = 0; else ix = MIN(ix, nxmo);
	   iy = *pyy++;
	   if (iy < 0) iy = 0; else iy = MIN(iy, nymo);
	   //printf("ix, iy = %d, %d\n", ix, iy);
	   // ptr is the result, note that it is loaded backwards in t
	   *ptr++ = *(pdata + ix + iy*nx);
	   }
         ptr = ptr + pstride;

	 }
	 break;
       case WORD:
         /* here we have to use I*2 pointers */
	 {
	   short *pcubes = (short *) pcube;
	   short *pdatas = (short *) pdata;
	   short *ptrs = (short *) ptr;
	   /* we want to load from bottom up for easier display, so get start of last
	   line */
	   if (extractline_mode & 1) ptrs = ptrs + np*(ncount - 1);
	   for (i=0; i < ncount; i++) {
	   /* get the plane number from it array or just increment */
	    if (it_flag) {
      	      j = *pt++;
      	      /* clip on boundaries */
      	      j = MAX(j, 0);		j = MIN(j, nsym-1);
      	      pdatas = pcubes + j * stride;
	    } else {
              j = i;
      	      pdatas = pcubes + i * stride;
	    }
	    /* the rest is similar to the symbol array case, using px and py
	    in the same way together with pdatas */
	    nq = np;
	    /* need to use j for px and py offsets if these vary with t */
	    if (nt > 1) { 
	     pxx = px + j*np;
	     pyy = py + j*np;
	    } else {
	     pxx = px;   pyy = py;
	    }
	    while (nq--) {
	     ix = *pxx++;
	     /* range clip */
	     if (ix < 0) ix = 0; else ix = MIN(ix, nxmo);
	     iy = *pyy++;
	     if (iy < 0) iy = 0; else iy = MIN(iy, nymo);
	     //printf("ix, iy = %d, %d\n", ix, iy);
	     // ptrs is the result, note that it is loaded backwards in t
	     *ptrs++ = *(pdatas + ix + iy*nx);
	     }
            ptrs = ptrs + pstride;
	   }
	 }
	 break;
       default: return execute_error(32); break;
     }
     break;
   default: return execute_error(32); break;
   }
 
 return result_sym;
 }
 /*-------------------------------------------------------------------------*/
 int ana_retarderscj(narg,ps)
  /* an optical retarder employing Jones' matrix */
  /* this is a function and returns the result Jones' vector
  the call is  out = retarderscj(in, sd, cd, theta) where in is the input Jones' vector,
  a Jones' vector is a 2 element complex vector that is implemented here as a (2,2)
  array to allow for the real and imaginary parts
  it may also be 3-D (or higher) to allow a set of input vectors, in this case the (2,2)
  must be the last dimensions */
  int narg,ps[];
  {
  float	c2, s2, theta, *injones, *sd, *cd, *y;
  float	*y0r, *y0i, *y1r, *y1i;
  float	*in0r, *in0i, *in1r, *in1i;
  float	zin0r, zin0i, zin1r, zin1i, x00r, x00i, x01i;
  int	incount, sdcount, cdcount, nc, result_sym, delin, deld, dim[8], nd;
  int	n, i, j, iq;
  theta = float_arg(ps[3]);
  theta = 2.0 * theta * .017453293;
  s2 = sin(theta);  c2 = cos(theta);

  iq = ps[0];
  if ( sym[iq].class != 4 ) return execute_error(66);
  iq = ana_float(1,&iq);
  h = (struct ahead *) sym[iq].spec.array.ptr;
  injones = (float *) ((char *)h + sizeof(struct ahead));
  nd = h->ndim;
  if (nd < 2) { printf("Jones' vector must be at least 2D\n"); return -1; }
  if ( h->dims[nd-1] != 2 || h->dims[nd-2] != 2) {
    printf("last 2 dimensions of Jones vector must be 2x2\n"); return -1; }
  /* get the inner count for injones */
  incount = 1;
  if (nd > 2) { for(j=0;j<nd-2;j++) incount *= h->dims[j]; }
  //printf("incount = %d\n", incount);
  /* the sd and cd arrays must be the same size and either this size agrees
  with incount or one of the choices is just 1, output has the larger count */
  iq = ps[1];
  iq = ana_float(1,&iq);
  sdcount = get_p_c(&iq, &sd);
  iq = ps[2];
  iq = ana_float(1,&iq);
  cdcount = get_p_c(&iq, &cd);
  if (sdcount != cdcount) {
    printf("dimensions of sd and cd don't agree in RETARDERSCJ, %d vs %d\n",
    	sdcount, cdcount); return -1; }
  delin = deld = 1;
  if (sdcount != incount) {
    /* one of them then has to be 1, this also is used to decide the structure
    of the result */
    if (sdcount == 1) { 
      deld = 0; 
      result_sym = array_clone(ps[0], 3);
    } else if (incount == 1) {
      delin = 0;
      /* here we want the first dimensions to agree with sd (and cd) but then
      tack on a (2,2), a little messy */
      h = (struct ahead *) sym[ps[2]].spec.array.ptr;
      nd = h->ndim;
      for (i=0;i<nd;i++) dim[i] = h->dims[i];
      if (nd > 6) { printf("dimension count for output too high (>8), %d\n",nd+2);
        return -1; }
      dim[nd] = 2;  dim[nd+1] = 2;  nd += 2;
      result_sym = array_scratch(3, nd, dim);
    } else {
    printf("dimension mismatch, %d vs %d\n", incount, sdcount); return -1; }
  } else {
    /* this means they both agree, so just clone "in" as the most straightforward option */
    result_sym = array_clone(ps[0], 3);
  }
  nc = MAX(sdcount, incount);
  /* the output complex vector is 4 values or 4 arrays if we are doing multiple
  wavelengths or angles */
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  y = (float *) ( (char *) h + sizeof( struct ahead ) );
  y0r = y;  y0i = y + nc;  y1r = y + 2*nc;   y1i = y + 3*nc;
  in0r = injones;  in0i = injones + incount;  in1r = injones + 2*incount;
  in1i = injones + 3*incount;
  n = nc;
  //printf("deld, delin = %d, %d\n", deld, delin);
  /* when there is only a single retardance value, use a less
  general and more efficient looping */
  if (deld == 0) {
    //printf("deld = 0 case\n");
    x00i = c2 * *sd;
    x00r = *cd;
    //x01i = x10i = -s2 * *sd;
    x01i = -s2 * *sd;
    //x11r = x00r;
    //x11i = - x00i;
    while (n--) {
      /* need the 4 current values of injones, since deld = 0,
      n must be count of injones and either delin is 1 or n is 1 */
      zin0r = *in0r++;  zin0i = *in0i++;  zin1i = *in1i++;
      *y0r++ = x00r * zin0r - x00i * zin0i - x01i * zin1i;
      zin1r = *in1r++;
      *y0i++ = x00i * zin0r + x00r * zin0i + x01i * zin1r;
      *y1r++ = - x01i * zin0i + x00r * zin1r + x00i * zin1i;
      *y1i++ = x01i * zin0r - x00i * zin1r + x00r * zin1i;
    }
  } else {
    /* the case with a variable sd and cd */
    while (n--) {
      x00i = c2 * *sd;
      x00r = *cd++;
      x01i = -s2 * *sd++;
      //x11r = x00r;
      //x11i = - x00i;
      /* need the 4 current values of injones */
//       zin0r = *in0r;  zin0i = *in0i;  zin1i = *in1i;
//       *y0r++ = x00r * zin0r - x00i * zin0i - x01i * zin1i;
//       zin1r = *in1r;
//       *y0i++ = x00i * zin0r + x00r * zin0i + x01i * zin1r;
//       in0r += delin;  in1r += delin;  in0i += delin;  in1i += delin;
//       *y1r++ = - x10i * zin0i + x11r * zin1r - x11i * zin1i;
//       *y1i++ = x10i * zin0r + x11i * zin1r + x11r * zin1i;
      zin0r = *in0r;  zin0i = *in0i;  zin1i = *in1i;
      *y0r++ = x00r * zin0r - x00i * zin0i - x01i * zin1i;
      zin1r = *in1r;
      *y0i++ = x00i * zin0r + x00r * zin0i + x01i * zin1r;
      in0r += delin;  in1r += delin;  in0i += delin;  in1i += delin;
      *y1r++ = - x01i * zin0i + x00r * zin1r + x00i * zin1i;
      *y1i++ = x01i * zin0r - x00i * zin1r + x00r * zin1i;
    }
  }
  return result_sym;
  }
/*-------------------------------------------------------------------------*/
 int ana_rhalfwavej(narg,ps)
  /* a rotated halfwave employing Jones' matrix */
  /* this is a function and returns the result Jones' vector
  the call is  out = rhalfwavej(in, theta) where in is the input Jones' vector,
  a Jones' vector is a 2 element complex vector that is implemented here as a (2,2)
  array to allow for the real and imaginary parts
  in may also be 3-D (or higher) to allow a set of input vectors, in this case the (2,2)
  must be the last dimensions */
  int narg,ps[];
  {
  float	c, s, c2, s2, theta, *injones, *y;
  float	*y0r, *y0i, *y1r, *y1i;
  float	*in0r, *in0i, *in1r, *in1i;
  float	zin0r, zin0i, zin1r, zin1i;
  int	incount, nc, result_sym, nd;
  int	n, i, j, iq;
  theta = float_arg(ps[1]);
  theta = 2.0 * theta * .017453293;
  s2 = sin(theta);  c2 = cos(theta);

  iq = ps[0];
  if ( sym[iq].class != 4 ) return execute_error(66);
  iq = ana_float(1,&iq);
  h = (struct ahead *) sym[iq].spec.array.ptr;
  injones = (float *) ((char *)h + sizeof(struct ahead));
  nd = h->ndim;
  if (nd < 2) { printf("Jones' vector must be at least 2D\n"); return -1; }
  if ( h->dims[nd-1] != 2 || h->dims[nd-2] != 2) {
    printf("last 2 dimensions of Jones vector must be 2x2\n"); return -1; }
  /* get the inner count for injones */
  incount = 1;
  if (nd > 2) { for(j=0;j<nd-2;j++) incount *= h->dims[j]; }
  result_sym = array_clone(ps[0], 3);
  nc = incount;
  /* the output complex vector is 4 values or 4 arrays if we are doing multiple
  wavelengths or angles */
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  y = (float *) ( (char *) h + sizeof( struct ahead ) );
  y0r = y;  y0i = y + nc;  y1r = y + 2*nc;   y1i = y + 3*nc;
  in0r = injones;  in0i = injones + incount;  in1r = injones + 2*incount;
  in1i = injones + 3*incount;
  n = nc;
  while (n--) {
    /* need the 4 current values of injones, since deld = 0,
    n must be count of injones and either delin is 1 or n is 1 */
    zin0r = *in0r++;  zin0i = *in0i++;  zin1i = *in1i++;  zin1r = *in1r++;
    *y0r++ = -c2 * zin0i + s2 * zin1i;
    *y0i++ =  c2 * zin0r - s2 * zin1r;
    *y1r++ =  c2 * zin1i + s2 * zin0i;
    *y1i++ = -c2 * zin1r - s2 * zin0r;
  }
  return result_sym;
  }
 /*-------------------------------------------------------------------------*/
 int ana_polj(narg,ps)
  /* a polarizer employing Jones' matrix */
  /* this is a function and returns the result Jones' vector
  the call is  out = polj(in, theta) where in is the input Jones' vector,
  a Jones' vector is a 2 element complex vector that is implemented here as a (2,2)
  array to allow for the real and imaginary parts
  it may also be 3-D (or higher) to allow a set of input vectors, in this case the (2,2)
  must be the last dimensions */
  int narg,ps[];
  {
  float	c, s, ss, cc, cs, theta, *injones, *y;
  float	*y0r, *y0i, *y1r, *y1i;
  float	*in0r, *in0i, *in1r, *in1i;
  float	zin0r, zin0i, zin1r, zin1i;
  int	incount, nc, result_sym, nd;
  int	n, i, j, iq;
  theta = float_arg(ps[1]);
  theta = theta * .017453293;
  s = sin(theta);  c = cos(theta);
  cc = c*c;  ss = s*s;  cs = c*s;

  iq = ps[0];
  if ( sym[iq].class != 4 ) return execute_error(66);
  iq = ana_float(1,&iq);
  h = (struct ahead *) sym[iq].spec.array.ptr;
  injones = (float *) ((char *)h + sizeof(struct ahead));
  nd = h->ndim;
  if (nd < 2) { printf("Jones' vector must be at least 2D\n"); return -1; }
  if ( h->dims[nd-1] != 2 || h->dims[nd-2] != 2) {
    printf("last 2 dimensions of Jones vector must be 2x2\n"); return -1; }
  /* get the inner count for injones */
  incount = 1;
  if (nd > 2) { for(j=0;j<nd-2;j++) incount *= h->dims[j]; }
  result_sym = array_clone(ps[0], 3);
  nc = incount;
  /* the output complex vector is 4 values or 4 arrays if we are doing multiple
  wavelengths or angles */
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  y = (float *) ( (char *) h + sizeof( struct ahead ) );
  y0r = y;  y0i = y + nc;  y1r = y + 2*nc;   y1i = y + 3*nc;
  in0r = injones;  in0i = injones + incount;  in1r = injones + 2*incount;
  in1i = injones + 3*incount;
  n = nc;
  while (n--) {
    /* need the 4 current values of injones, since deld = 0,
    n must be count of injones and either delin is 1 or n is 1 */
    zin0r = *in0r++;  zin0i = *in0i++;  zin1i = *in1i++;  zin1r = *in1r++;
    *y0r++ = cc * zin0r + cs * zin1r;
    *y0i++ = cc * zin0i + cs * zin1i;
    *y1r++ = cs * zin0r + ss * zin1r;
    *y1i++ = cs * zin0i + ss * zin1i;
  }
  return result_sym;
  }
 /*-------------------------------------------------------------------------*/
 int ana_poloutj(narg,ps)
  /* a polarizer employing Jones' matrix, differs from polj in that it computes
  the output intensity */
  /* this is a function and returns the intensity
  the call is  out = poloutj(in, theta) where in is the input Jones' vector,
  a Jones' vector is a 2 element complex vector that is implemented here as a (2,2)
  array to allow for the real and imaginary parts
  it may also be 3-D (or higher) to allow a set of input vectors, in this case the (2,2)
  must be the last dimensions */
  int narg,ps[];
  {
  float	c, s, theta, *injones, *y;
  float	*in0r, *in0i, *in1r, *in1i;
  float	zin0r, zin0i, zin1r, zin1i, yr, yi;
  int	incount, nc, result_sym, dim[8], nd;
  int	n, i, j, iq;
  theta = float_arg(ps[1]);
  theta = theta * .017453293;
  s = sin(theta);  c = cos(theta);

  iq = ps[0];
  if ( sym[iq].class != 4 ) return execute_error(66);
  iq = ana_float(1,&iq);
  h = (struct ahead *) sym[iq].spec.array.ptr;
  injones = (float *) ((char *)h + sizeof(struct ahead));
  nd = h->ndim;
  if (nd < 2) { printf("Jones' vector must be at least 2D\n"); return -1; }
  if ( h->dims[nd-1] != 2 || h->dims[nd-2] != 2) {
    printf("last 2 dimensions of Jones vector must be 2x2\n"); return -1; }
  /* get the inner count for injones */
  incount = 1;
  if (nd > 2) { for(j=0;j<nd-2;j++) incount *= h->dims[j]; }
  nd = nd - 2;
  if (nd > 0) {
    for(j=0;j<nd-2;j++) dim[j] = h->dims[j+2];
    result_sym = array_scratch(3, nd, dim);
    h = (struct ahead *) sym[result_sym].spec.array.ptr;
    y = (float *) ( (char *) h + sizeof( struct ahead ) );
  } else {
    result_sym = scalar_scratch(3);
    y = &sym[result_sym].spec.scalar.f;
  }
  nc = incount;
  /* the output is a value or array if we are doing multiple
  wavelengths or angles */
  in0r = injones;  in0i = injones + incount;  in1r = injones + 2*incount;
  in1i = injones + 3*incount;
  n = nc;
  while (n--) {
    /* need the 4 current values of injones, since deld = 0,
    n must be count of injones and either delin is 1 or n is 1 */
    yr = c * *in0r++ + s * *in1r++;
    yi = c * *in0i++ + s * *in1i++;
    *y++ = yr *yr + yi *yi;
  }
  return result_sym;
  }
 /*-------------------------------------------------------------------------*/
 int ana_ppolj(narg,ps)
  /* a partial polarizer employing Jones' matrix */
  /* this is a function and returns the result Jones' vector
  the call is  out = ppolj(in, theta, f) where in is the input Jones' vector,
  a Jones' vector is a 2 element complex vector that is implemented here as a (2,2)
  array to allow for the real and imaginary parts
  in may also be 3-D (or higher) to allow a set of input vectors, in this case the (2,2)
  must be the last dimensions */
  int narg,ps[];
  {
  float	c, s, ss, cc, cs, cd, cq, sq, f, theta, *injones, *y;
  float	*y0r, *y0i, *y1r, *y1i;
  float	*in0r, *in0i, *in1r, *in1i;
  float	zin0r, zin0i, zin1r, zin1i;
  int	incount, nc, result_sym, nd;
  int	n, i, j, iq;
  theta = float_arg(ps[1]);
  f = float_arg(ps[2]);
  theta = theta * .017453293;
  s = sin(theta);  c = cos(theta);
  cc = c*c;  ss = s*s;  cs = c*s;  cd = cs * (1.0 - f);
  cq = cc - ss * f;  sq = ss + cc * f;

  iq = ps[0];
  if ( sym[iq].class != 4 ) return execute_error(66);
  iq = ana_float(1,&iq);
  h = (struct ahead *) sym[iq].spec.array.ptr;
  injones = (float *) ((char *)h + sizeof(struct ahead));
  nd = h->ndim;
  if (nd < 2) { printf("Jones' vector must be at least 2D\n"); return -1; }
  if ( h->dims[nd-1] != 2 || h->dims[nd-2] != 2) {
    printf("last 2 dimensions of Jones vector must be 2x2\n"); return -1; }
  /* get the inner count for injones */
  incount = 1;
  if (nd > 2) { for(j=0;j<nd-2;j++) incount *= h->dims[j]; }
  result_sym = array_clone(ps[0], 3);
  nc = incount;
  /* the output complex vector is 4 values or 4 arrays if we are doing multiple
  wavelengths or angles */
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  y = (float *) ( (char *) h + sizeof( struct ahead ) );
  y0r = y;  y0i = y + nc;  y1r = y + 2*nc;   y1i = y + 3*nc;
  in0r = injones;  in0i = injones + incount;  in1r = injones + 2*incount;
  in1i = injones + 3*incount;
  n = nc;
  while (n--) {
    /* need the 4 current values of injones, since deld = 0,
    n must be count of injones and either delin is 1 or n is 1 */
    zin0r = *in0r++;  zin0i = *in0i++;  zin1i = *in1i++;  zin1r = *in1r++;
    *y0r++ = cq * zin0r + cd * zin1r;
    *y0i++ = cq * zin0i + cd * zin1i;
    *y1r++ = cd * zin0r + sq * zin1r;
    *y1i++ = cd * zin0i + sq * zin1i;
  }
  return result_sym;
  }
/*-------------------------------------------------------------------------*/
 /* some globals for the blob coloring, for C++ these would be in the class */
 static int nx, ny, size;
 //char  match, **ptr, **plimbot, **plimtop, *ptop, *pbot;
 static char  match, *ptop, *pbot;
 static int   **plimbot, **plimtop, *ptr;
 int maxobjoverlap = 2000;   /* keep open for symbols.c */
/*-------------------------------------------------------------------------*/
void colorblob(char *p, int *f, int color)
 {
 /* this is recursive and depends on a 0 guard border or it will run amok ! */
 if (*p == match) {
   *p = 0;
   *f = color;
   size++;
   /* 4 way connection */
   p++; f++;         colorblob(p, f, color);
   p -= 2; f -= 2;   colorblob(p, f, color);
   p = p + 1 - nx;
   f = f + 1 - nx;   colorblob(p, f, color);
   p += nx * 2;
   f += nx * 2;       colorblob(p, f, color);
 }
 }
/*-------------------------------------------------------------------------*/
void eraseblob(int *f, int color)
 {
 /* this leaves the bitmap zeroed and wipes out a colored object, used when the
 object is too small (only use so far) */
 if (*f == color) {
   *f = 0;
   /* 4 way connection */
   f++;               eraseblob(f, color);
   f -= 2;   	      eraseblob(f, color);
   f = f + 1 - nx;    eraseblob(f, color);
   f += nx * 2;       eraseblob(f, color);
 }
 }
/*-------------------------------------------------------------------------*/
int ana_getcurvature(int narg, int ps[])
  {
  int	result_sym, iq, jq, type, nd, n, nyq, nxq, limit;
  double t1, t2, t3, t4;
  char	*pb, *p, pq;
  float *s0, *s1, *s, *sb;
  float	xq, yq, zq, aq, bq, cq, dq;
  t1 = systime();
  iq = ps[0];
  if ( sym[iq].class != 4 ) return execute_error(66);
  /* these must be 2-D arrays */
  h = (struct ahead *) sym[iq].spec.array.ptr;
  nd = h->ndim;
  type = sym[iq].type;
  nx = h->dims[0];	ny = h->dims[1];
  if (nd != 2) { printf("input bitmap for getobjects must be 2-D\n"); return -1; }
  /* make it a float, these are usually processed, i.e., smoothed, etc */
  iq = ana_float(1,&iq);
  /* result will hold the curvature map, use I*1 since this is a tri-map */
  t2 = systime();
  result_sym = array_clone(iq, 0);
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  pb = (char *) ((char *)h + sizeof(struct ahead));
  /* and zero the border, the inside is handled in the calculation */
  p = pb;              n = nx;  while (n--) *p++ = 0;
  p = pb + nx*(ny-1);  n = nx;  while (n--) *p++ = 0;
  p = pb;              n = ny;  while (n--)  { *p = 0;  p += nx -1; *p = 0; p++; }
  /* we work inside the border by one pixel , and do a nested loop */
  nyq = ny - 2;
  h = (struct ahead *) sym[iq].spec.array.ptr;
  sb = (float *) ((char *)h + sizeof(struct ahead));
  s = sb + nx + 1;
  p = pb + nx;
  t3 = systime();
  while (nyq--) {
    /* s0 and s1 are the trailing and leading pixels */ 
    nxq = nx - 2;
    *p++ = 0;
    while (nxq--) {
      /* has to be curved in the same sign in all 4 directions to be set */
      s0 = s - 1;  s1 = s + 1;
      xq = 2.0 * (*s++);
      pq = 0;
      aq = xq - (*s0) - (*s1);
      /* the curvature and the signal have to be the same sign so divide
      according to the signal sign */
      if (xq > 0) {
	if (aq >= 0) {
	  s0 -= nx;  s1 +=nx;
	  bq = xq - (*s0) - (*s1);
	  if (bq >= 0) {
	    s0 +=1;  s1 -=1;
	    cq = xq - (*s0) - (*s1);
	    if (cq >= 0) {
	      s0 +=1;  s1 -=1;
	      dq = xq - (*s0) - (*s1);
	      if (dq >= 0) pq = 1;
	    }
	  }
	}
      } else if (xq < 0) {
	if (aq <= 0) {
	  s0 -= nx;  s1 +=nx;
	  bq = xq - (*s0) - (*s1);
	  if (bq <= 0) {
	    s0 +=1;  s1 -=1;
	    cq = xq - (*s0) - (*s1);
	    if (cq <= 0) {
	      s0 +=1;  s1 -=1;
	      dq = xq - (*s0) - (*s1);
	      if (dq <= 0) pq = -1;
	    }
	  }
	}
      }
      *p++ = pq;
    }
    s = s + 2;
    *p++ = 0;
  }
  t4 = systime();
  printf("getcurvature total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms, part 3 = %10.2f ms\n",
    1.E3*(t4-t1), 1.E3*(t2-t1), 1.E3*(t3-t2), 1.E3*(t4-t3));
  return result_sym;
  }
/*-------------------------------------------------------------------------*/
int ana_getobjects(int narg, int ps[])
  /* inputs a bitmap and an optional min size and finds connected objects */
  {
  int	result_sym, iq, jq, type, nd, n, ntotal;
  int	*fm, *f, color1, color2, minsize;
  char	*pb, *p, bq;
  double t1, t2, t3, t4;
  /* objects here are connected features in the bitmap, there can be more than
  one type of object, feature free pixels must be zero */
  t1 = systime();
  /* get optional minimum size */
  if (narg > 1)  {
    if (int_arg_stat(ps[1], &minsize) != 1) return -1;
    /* minsize must be at least 1 */
    if (minsize < 1) minsize = 1;
  } else minsize = 1;

  iq = ps[0];
  if ( sym[iq].class != 4 ) return execute_error(66);
  /* these must be 2-D arrays */
  h = (struct ahead *) sym[iq].spec.array.ptr;
  nd = h->ndim;
  type = sym[iq].type;
  nx = h->dims[0];	ny = h->dims[1];
  ntotal = nx * ny;
  if (nd != 2) { printf("input bitmap for getobjects must be 2-D\n"); return -1; }
  /* make a local copy, this slows us down but allows a 0 border and other possible
  mods to be made w/o affecting the original */
  if (type != 0) jq = ana_byte(1,&iq); else {
    jq = array_clone(ps[0], 0);
    h = (struct ahead *) sym[iq].spec.array.ptr;
    p = (char *) ((char *)h + sizeof(struct ahead));
    h = (struct ahead *) sym[jq].spec.array.ptr;
    pb = (char *) ((char *)h + sizeof(struct ahead));
    bcopy(p, pb, ntotal);
  }
  h = (struct ahead *) sym[jq].spec.array.ptr;
  pb = (char *) ((char *)h + sizeof(struct ahead));

  /* and zero the border */
  p = pb;              n = nx;  while (n--) *p++ = 0;
  p = pb + nx*(ny-1);  n = nx;  while (n--) *p++ = 0;
  p = pb;              n = ny;  while (n--)  { *p = 0;  p += nx -1; *p = 0; p++; }
  
  /* result will hold the colored map, use I*4  in case we get a lot even though ~30000
  features may seem reasonable now */
  t2 = systime();
  result_sym = array_clone(ps[0], 2);
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  fm = (int *) ((char *)h + sizeof(struct ahead));
  bzero( (void *) fm, ntotal * sizeof(int));
  
  /* have to loop through the whole array and look at both, we include the edges
  since we may change the zero border thing before we finish the final code */
  p = pb;  f = fm;
  color1 = 2;
  color2 = -2;
  t3 = systime();
  while (ntotal--) {
    /* we want to check for at least 2 types for Mandy's + and - fields */
    bq = *p;
    size = 0;
    if (bq) {
      if (bq == 1) {
	match = 1;
	colorblob(p, f, color1);
	/* size does matter */
	if (size < minsize) eraseblob(f, color1); else color1++;
      } else if (bq == -1) {
	match = -1;
	colorblob(p, f, color2);
	if (size < minsize) eraseblob(f, color2); else color2--;
      }
    }
    p++;  f++;
  }  
  
  /* delete the byte copy we made */
  ana_deletesymbol(1,&jq);
  t4 = systime();
  printf("getobjects total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms, part 3 = %10.2f ms\n",
    1.E3*(t4-t1), 1.E3*(t2-t1), 1.E3*(t3-t2), 1.E3*(t4-t3));
  printf("mark end, result_sym = %d\n", result_sym);
  return result_sym;
  }
/*-------------------------------------------------------------------------*/
// void colorblobptr(char *p)
//  {
//  /* this is recursive and depends on a 0 guard border or it will run amok ! */
//  if (*p == match) {
//    if (p >= ptop || p < pbot) {
//      printf("p problem %#x, limits are %#x, %#x\n", p, ptop, pbot);
//    }
//   
//    *p = 0;
//    if (ptr >= plimtop || ptr < plimbot) {
//      printf("ptr problem %#x, limits are %#x, %#x\n", ptr, plimbot, plimtop);
//    }
//    *ptr++ = p;   /* load the pointer */
//    size++;
//    /* 4 way connection */
//    p++;             colorblobptr(p);
//    p -= 2;          colorblobptr(p);
//    p = p + 1 - nx;  colorblobptr(p);
//    p += nx * 2;     colorblobptr(p);
//  }
//  }
/*-------------------------------------------------------------------------*/
void colorblobptrnr(char *pb, int i, int *ptr)
 {
 /* this is non-recursive, still depends on a 0 guard border or it will run amok ! */
 int any = 1, done = 0, *pbase;
 size = 1;   /* have 1 because we got here */
 pbase = ptr;
 *ptr++ = i;   /* load the first pointer */
 while (size > done) {
   i = *pbase++;	/* center for checking connections */
   *(pb + i) = 0;

   i++;
   if (*(pb + i) == match) { *ptr++ = i; size++; *(pb + i) = 0; }

   i -= 2;
   if (*(pb + i) == match) { *ptr++ = i; size++; *(pb + i) = 0; }

   i = i + 1 - nx;
   if (*(pb + i) == match) { *ptr++ = i; size++; *(pb + i) = 0; }

   i += 2 * nx;
   if (*(pb + i) == match) { *ptr++ = i; size++; *(pb + i) = 0; }
   done++;
 }
 /* note that ptr should now contain size offsets (not pointers) */
 }
/*-------------------------------------------------------------------------*/
int ana_getobjectptrs(int narg, int ps[])
  /* inputs a bitmap and an optional min size and finds connected objects an returns pointers
  4/24/2009 - modify to make cmap contents the object id+1 rather than the color, this makes it
  easier to just modify the colors for subsequent operations, because 0 is the background, the cmap
  object entries start with 1 */
  /* cmap = getobjectptrs(curvmap, minSize, indexsymarr1, indexsymarr2, mgmap, minmgsignal) */
  /* cmap = getobjectptrs(curvmap, inheritcolors, minSize, indexsymarr1, colors, mgmap, minmgsignal) */
  {
  int	result_sym, iq, jq, kq, type, nd, n, ntotal, j, mq, ntq, nbad, mapset, i;
  int	*fm, *f, color1, color2, minsize, **pblock, **instart, *poffsets, **pq, *po, *cq, *prevcmap;
  int	indexsymarr, colorarrsym, nobjs, nobjs1, nobjs2, ptrFlag, thrFlag, colorFlag, inheritFlag;
  float  minmgsignal, *mgmap;
  char	*pb, *p, bq, *pcurv;
  double t1, t2, t3, t4;
  struct sym_desc *pdesc;
  /* objects here are connected features in the bitmap, there can be more than
  one type of object, feature free pixels must be zero */
  t1 = systime();
  /* get optional minimum size */
  if (narg > 2)  {
    if (int_arg_stat(ps[2], &minsize) != 1) return -1;
    /* minsize must be at least 1 */
    if (minsize < 1) minsize = 1;
  } else minsize = 1;
  if (narg > 3)  { indexsymarr = ps[3];  ptrFlag = 1; } else ptrFlag = 0;
//   if (narg == 3) {
//     printf("GETOBJECTPTRS - only one symbol array specified, need both or none\n");
//     return -1;
//   }
  /* the 4th arg is now an output array for the colors and is optional */
  if (narg > 4)  { colorarrsym = ps[4];  colorFlag = 1; } else colorFlag = 0;
  iq = ps[0];
  /* the first arg must be a 2-D byte array, often a bitmap from getcurvature function */
  ntotal = get_2d_array(ps[0], BYTE, (int **) &pcurv, &nx, &ny);
  if (ntotal <= 0) return -1;
  /* check if we are inheriting colors from a previous map */
  /* this is presently abandoned but leave in some of the machinery in case I want to reconsider */
  if (narg > 1)  {
    kq = ps[1];
    inheritFlag = 0;
    if ( sym[kq].class == 4 ) {
      h = (struct ahead *) sym[kq].spec.array.ptr;
      nd = h->ndim;
      /* has to match the bitmap */
      if (nd == 2 && h->dims[0] == nx && h->dims[1] == ny) {
	inheritFlag = 1;
	/* and it has to be I*4 */
	kq = ana_long(1,&kq);
	h = (struct ahead *) sym[kq].spec.array.ptr;
	prevcmap = (int *) ((char *)h + sizeof(struct ahead));
      }
    }
  }

  /* a B field (or whatever) array can be used to apply a minimum threshold level for each object,
  this requires a minimum magnitude somewhere in the object, not always useful but can help for
  Mandy's objects */
  if (narg > 5)  { 
    if (narg > 6) { if (float_arg_stat(ps[6], &minmgsignal) != 1) return -1; } else {
      printf("GETOBJECTPTRS - also need a threshold if mgmap parameter present\n");
      return -1;
    }
    kq = ps[5];
    h = (struct ahead *) sym[kq].spec.array.ptr;
    nd = h->ndim;
    /* has to match the bitmap */
    if (nd != 2 || h->dims[0] != nx || h->dims[1] != ny) {
      printf("GETOBJECTPTRS - the mgmap array must be of same size as curvmap array\n");
      return -1;
    }
    /* make it a float, these are usually processed, i.e., smoothed, etc */
    kq = ana_float(1,&kq);
    h = (struct ahead *) sym[kq].spec.array.ptr;
    mgmap = (float *) ((char *)h + sizeof(struct ahead));
    thrFlag = 1;
  } else thrFlag = 0;
  /* make a local copy of the input map, this slows us down but allows a 0 border and other possible
  mods to be made w/o affecting the original */
  pb = (char *) malloc(ntotal + 100);
  bcopy(pcurv, pb, ntotal);
  /* and zero the border */
  pbot = pb;
  ptop = pb + ntotal;
  p = pb;              n = nx;  while (n--) {  *p = 0; p++; }
  p = pb + nx*(ny-1);  n = nx;  while (n--) {  *p = 0; p++; }
  p = pb;              n = ny;  while (n--) {  *p = 0;  p += nx; }
  p = pb + nx - 1;     n = ny;  while (n--) {  *p = 0;  p += nx; }
  /* result will hold the colored map, use I*4  in case we get a lot even though ~30000
  features may seem reasonable now */
  t2 = systime();
  result_sym = array_clone(ps[0], 2);
  h = (struct ahead *) sym[result_sym].spec.array.ptr;
  fm = (int *) ((char *)h + sizeof(struct ahead));
  bzero( (void *) fm, ntotal * sizeof(int));
  
  /* set up a block of pointers, used for 2 functions */
  mq = ntotal * sizeof (int *);
  pblock = (int **) malloc(mq);
  instart = pblock;
  plimbot = pblock;
  plimtop = pblock + ntotal;
  /* have to loop through the whole array and look at both, we include the edges
  since we may change the zero border thing before we finish the final code */
  p = pb;
  color1 =  2;
  color2 = -2;
  t3 = systime();
  //nobjs1 = nobjs2 = 0;
  nobjs = 1;
  ntq = ntotal;
  nbad = 0;
  for (i=0;i<ntotal;i++) {
    /* we want to check for at least 2 types for Mandy's + and - fields */
    if (p >= ptop || p < pbot) { printf("p problem %#x, limits are %#x, %#x\n", p, ptop, pbot); }
    bq = *p;
    size = 0;
    if (bq) {
      ptr = (int *) instart;
      if (bq == 1) {
	match = 1;
	colorblobptrnr(pb, i, (int *) ptr);
	mapset = color1;
      } else if (bq == -1) {
	match = -1;
	colorblobptrnr(pb, i, (int *) ptr);
	mapset = color2;
      } else goto bypass;
      
      /* size does matter */
      if (size >= minsize) {
	 int *pt, iq, nsq = size;
	 pt = (int *) instart;
	 /* also check if doing thresholding, this could kill this object */
	 if (thrFlag) {
	   double xq, yq = (double) minmgsignal;
	   int	okFlag = 0;
	   while (nsq--) {
	     iq = *pt++;
	     /* check the mgmap map */
	     xq  = fabs( (double) *(mgmap + iq));
	     if (xq >= yq) { okFlag = 1;  break; }
	   }
	   if (!okFlag) goto bypass;
	   /* restore pt to start value if we go on */
	   pt = (int *) instart;
	 }
	 if (ptrFlag) {
           /* save this one, note that we want I*4 offsets, not the pointers, also want an array header
	   in here */
	   poffsets = (int *) malloc( size * sizeof (int) + sizeof(struct ahead));
	   h = (struct ahead *) poffsets;
	   h->ndim = 1;  h->c1 = (byte) bq; h->c2 = 0; h->dims[0] = size;  h->facts = NULL;
	   /* use c1 to store the sign of the feature for sorting out later (below) */
	   /* and use h->dims[1] to stash the "color", this is non-standard use of the header */
	   h->dims[1] = mapset;
	   po = (int *) ((char *) poffsets + sizeof(struct ahead));
	   /* also check if doing thresholding */
	   while (size--) {
	     iq = *pt++;
	     *po++ = iq;
	     /* also set the output map, 4/24/2009 - modified to be the object number (aka id) + 1 */
	     //*(fm + iq) = mapset;
	     *(fm + iq) = nobjs;
	   }
	   /* and save the address of these in the pblock array */
	   *instart++ = poffsets;
	   /* the new instart can be used for next collection */
	 } else {
	 /* if no symbol array output requested, just set the color map output */
	   while (size--) {
	     iq = *pt++;
	     /* just set the output map */
	     *(fm + iq) = mapset;
	   }
	   /* note that instart is not changed for this option */
	 }
         if (bq != 1 && bq != -1) printf("BOGUS bq = %d, size = %d\n", bq, size);
// 	 if (bq == 1)  { nobjs1++;  color1++; }
// 	 if (bq == -1) { nobjs2++;  color2--; }
	 if (bq == 1)  { color1++; }
	 if (bq == -1) { color2--; }
	 nobjs++;
      }

    }
    bypass:
    p++;
  }
  /* much of the rest is done only for the ptr output situation */
  //nobjs = nobjs1 + nobjs2;
  nobjs--;  /* 4/24/2009 - we were biased by 1 */
  //printf("# objects = %d, type 1 %d, type 2 %d\n", nobjs, nobjs1, nobjs2);
  if (nobjs > 0) {
    if (ptrFlag) {
      if ( redef_symarr(indexsymarr, 1, &nobjs) != 1 ) return -1;
      h = (struct ahead *) sym[indexsymarr].spec.array.ptr;
      pdesc = (struct sym_desc *) ((char *)h + sizeof(struct ahead));
      /* do we want a color array ? */
      if (colorFlag) {
	/* a I*4 array of length nobjs */
	if (redef_array(colorarrsym,  2, 1, &nobjs) != 1) return -1;
	h = (struct ahead *) sym[colorarrsym].spec.array.ptr;
	cq = (int *) ((char *)h + sizeof(struct ahead));
      }
      /* and load them */
      pq = pblock;
      for (j=0;j<nobjs;j++) {
	 po = *pq++;  /* po is the address of our stash of I*4 arrays with headers */
	 h = (struct ahead *) po;
	 bq = (char) h->c1;
	 size = h->dims[0];
	 mapset = h->dims[1];   h->dims[1] = 0;
	 mq = size * sizeof (int ) + sizeof(struct ahead);
	 if (colorFlag) *cq++ = mapset;
	 pdesc->class = ARRAY;
	 pdesc->type  = LONG;
	 pdesc->xx = - 1;  /* this is not named */
	 pdesc->spec.array.bstore = mq;
	 pdesc->spec.array.ptr = po;
	 pdesc++;
      }
    }
  }
  /* delete the byte copy we made */
  free(pb);
  free(pblock);
  t4 = systime();
  printf("getobjectptrs total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms, part 3 = %10.2f ms\n",
    1.E3*(t4-t1), 1.E3*(t2-t1), 1.E3*(t3-t2), 1.E3*(t4-t3));
  //printf("mark end, result_sym = %d\n", result_sym);
  return result_sym;
  }
/*-------------------------------------------------------------------------*/
int ana_objectoverlap_old(int narg, int ps[])
  {
  /* a subr:  objectoverlap, cmap1, in1, colors1, in2, colors2
  modifies colors2, cmap1 is a object color map, in1 is a symbol array of ptr arrays for each
  object, colors1 is an array of the colors */
  int	result_sym, iq, jq, kq, nd, n, nx, ny, ntotal, j, mq, ntq, nbad, mapset, i, k;
  int	nin1, nin2, ncol1, ncol2, mincol, maxcol, range, colq, nptr, sgn1, sgn2;
  int   maxpluscol, minminuscol, nsingle, nmult, nnew;
  int	*fm, *colset1, *colset2, *pl, *lut, *po, *objover, *objq;
  struct sym_desc *pdesc, *pbase1, *pbase2;
  double t1, t2, t3, t4;
    /* first arg is an array */
  t1 = systime();
  iq = ps[0];
  /* the first arg must be a 2-D byte array, often a bitmap from getcurvature function */
  ntotal = get_2d_array(iq, LONG, &fm, &nx, &ny);
  if (ntotal <= 0) return -1;
  /* second and fourth arguments must be symbol arrays */
  nin1 =  get_1d_sym_array(ps[1], &pbase1);
  if (nin1 <= 0) return -1;
  nin2 =  get_1d_sym_array(ps[3], &pbase2);
  if (nin2 <= 0) return -1;
  /* third and fifth must be 1-D color arrays */
  ncol1 = get_1d_array(ps[2], LONG, &colset1);
  if (ncol1 <= 0) return -1;
  ncol2 = get_1d_array(ps[4], LONG, &colset2);
  if (ncol2 <= 0) return -1;
  /* some checks, the color numbers must agree */
  if (nin1 != ncol1 || nin2 != ncol2 ) {
    fprintf(stderr, "OBJECTOVERLAP - ptr and color counts inconsistent\n"); return -1;
  }
  /* make an inverse LUT for the first set */
  mincol = maxcol = 0;
  n = ncol1;  pl = colset1;
  while (n--) { iq = *pl++; if (iq >  maxcol) maxcol = iq;  if (iq <  mincol) mincol = iq; }
  range = maxcol - mincol + 1;
  //printf("range = %d\n", range);
  /* save for adding new ones that extend the range */
  if (maxcol > 0) maxpluscol = maxcol; else maxpluscol = 0;
  if (mincol < 0) minminuscol = mincol; else minminuscol = 0;
  /* set up the LUT array */
  mq = range * sizeof(int);
  lut = (int *) malloc(mq);
  bzero(lut, mq);
  n = ncol1;  pl = colset1;
  for (i=0; i<ncol1; i++) { colq = *pl++; j = colq - mincol;  lut[j] = i; }
  nsingle = nmult = nnew = 0;
  t2 = systime();

  /* now the main event */
  for (i=0;i<nin2;i++) {
    int jprev, nct, nuobj, *objq, *objover;
    colq = colset2[i];   /* the color, we need for the sign */
    sgn1 = (colq > 0) ? 1: 0;
    pdesc = pbase2 + i;	 /* the ptr array */
    if (pdesc->type != LONG) { fprintf(stderr, "OBJECTOVERLAP - a ptr array is not type LONG\n"); break; }
    h = (struct ahead *) pdesc->spec.array.ptr;
    /* anything there ? */
    nd = h->ndim;
    nptr = h->dims[0]; if (nd > 1) for (j=1;j<nd;j++) nptr *= h->dims[j];
    //printf("size of feature, nptr = %d, ", nptr);
    if (nptr > 0) {
      po = (int *) ((char *)h + sizeof(struct ahead));
      /* a place to store the overlap indices */
      mq = nptr * sizeof(int);
      objover = (int *) malloc(mq);  /* for the values from cmap1 (aka fm) */
      objq = objover;
      nuobj = nct = 0;
      jprev = 0;
      /* look for matches in fm, the cmap1 2-D array */
      for (j=0;j<nptr;j++) {
        iq = po[j];
	jq = fm[iq];  /* this value in fm, the color map for t = 0 */
	if (jq) {
	  /* don't count 0's or wrong sign */
	  sgn2 = (jq > 0) ? 1: 0;
	  if (sgn2 == sgn1) {
	    if (jq != jprev) { nuobj++; jprev = jq; }
	    *objq++ = jq;
	  }
	}
      }
      nct =  objq - objover;  /* our tally */
      /* did we get any ? */
      if (nct > 0) {
        /* multiple matches? */
	if (nuobj > 1) {
	  /* we count how many of each, need a bit of memory */
	  int *values, *vp, *valuecnts, *vc, nv, notfound, maxcnt, maxloc;
	  //printf("multiple matches, nuobj = %d, ", nuobj);
	  nmult++;
	  mq = nuobj * sizeof(int);
	  values = vp = malloc(mq);
	  valuecnts = vc = malloc(mq);
	  objq = objover;
	  jq = *objq++;
	  values[0] = jq;  valuecnts[0] = 1;  nv = 1;
	  for (j=1;j<nct;j++) {
	    jq = *objq++;
	    notfound = 1;
	    /* scan through already noted values */
	    for (k=0;k<nv;k++) {
	      if (jq == values[k]) {
	        valuecnts[k]++;  notfound = 0;  break;
	      }
	    }
	    if (notfound) {
	      values[nv] = jq;
	      valuecnts[nv] = 1;
	      nv++;
	    }
	  }
	  /* and we have to find the winner count wise */
	  maxcnt = 0;  maxloc = 0;
	  for (k=0;k<nv;k++) {
	    if (valuecnts[k] > maxcnt) { maxcnt = valuecnts[k];  maxloc = k; }
	  }
	  //printf("final nv = %d, winner is %d with count = %d\n", nv, maxloc, maxcnt);
	  //if (nv > nuobj) { printf("too many found in second pass at i =%d\n", i);  return -1; }
	  /* so our winner is at maxloc */
	  colq = values[maxloc];
	  free(values);
	  free(valuecnts);
	} else {
	  /* only one match, just grab the first value in objover */
	  //printf("single match\n");
	  nsingle++;
	  colq = *objover;
	}
	/* for the selected inherited color, zero the area in cmap1 (aka fm here), this is
	done to avoid "splits" wherein an old object's color is inherited by 2 or more, to do this
	we need to get the entire object in fm, not just the overlap area, this is why we have
	the inverse LUT */
	{
	  int	j, nd, nptr2, iq;
	  j = lut[colq - mincol];
	  if (j < 0 || j >= nin1) {
	    fprintf(stderr, "OBJECTOVERLAP - internal error with reverse LUT, j = %d, nin1= %d\n", j, nin1);
	    return -1;
	  }
	  pdesc = pbase1 + j;	 /* the ptr array in in1 */
	  if (pdesc->type != LONG) { fprintf(stderr, "OBJECTOVERLAP - a ptr array is not type LONG in in1\n"); break; }
	  h = (struct ahead *) pdesc->spec.array.ptr;
	  /* anything there ? */
	  nd = h->ndim;
	  nptr2 = h->dims[0]; if (nd > 1) for (j=1;j<nd;j++) nptr2 *= h->dims[j];
          po = (int *) ((char *)h + sizeof(struct ahead));
	  //printf("size of #1 feature, nptr2 = %d, ", nptr2);
	  if (nptr2 > 0) {
	    for (j=0;j<nptr2;j++) {
              iq = po[j];
	      fm[iq] = 0;
	    }
	  }
	}
      } else {
        /* no matches, must be new */
	//printf("no match\n");
	nnew++;
	if (sgn1) colq = ++maxpluscol; else colq = --minminuscol;
      }
     //printf("final colq = %d\n", colq);
     colset2[i] = colq;  /* set the inherited or new color */
     free(objover);
    }
  }
  printf("nsingle, nmult, nnew = %d, %d, %d\n", nsingle, nmult, nnew);
  printf("old range: %d:%d, new range: %d:%d\n", mincol,maxcol,minminuscol,maxpluscol);
  free(lut);
  t3 = systime();
  printf("objectoverlap total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
  return 1;
  }
/*-------------------------------------------------------------------------*/
int ana_objectoverlap(int narg, int ps[])
  {
  /* a subr:  objectoverlap, cmap1, in1, colors1, in2, colors2
  modifies colors2, cmap1 is a object color map, in1 is a symbol array of ptr arrays for each
  object, colors1 is an array of the colors
  4/24/2009 - modified to use a cmap that contains object id+1 instead of color */
  int	result_sym, iq, jq, kq, nd, n, nx, ny, ntotal, j, mq, ntq, nbad, mapset, i, k;
  int	nin1, nin2, ncol1, ncol2, mincol, maxcol, range, colq, colinq, nptr, sgn1, sgn2;
  int   maxpluscol, minminuscol, nsingle, nmult, nnew;
  int	*fm, *colset1, *colset2, *pl, *po, *objover, *objq;
  struct sym_desc *pdesc, *pbase1, *pbase2;
  double t1, t2, t3, t4;
    /* first arg is an array */
  t1 = systime();
  iq = ps[0];
  /* the first arg must be a 2-D byte array, often a bitmap from getcurvature function */
  ntotal = get_2d_array(iq, LONG, &fm, &nx, &ny);
  if (ntotal <= 0) return -1;
  /* second and fourth arguments must be symbol arrays */
  nin1 =  get_1d_sym_array(ps[1], &pbase1);
  if (nin1 <= 0) return -1;
  nin2 =  get_1d_sym_array(ps[3], &pbase2);
  if (nin2 <= 0) return -1;
  /* third and fifth must be 1-D color arrays */
  ncol1 = get_1d_array(ps[2], LONG, &colset1);
  if (ncol1 <= 0) return -1;
  ncol2 = get_1d_array(ps[4], LONG, &colset2);
  if (ncol2 <= 0) return -1;
  /* some checks, the color numbers must agree */
  if (nin1 != ncol1 || nin2 != ncol2 ) {
    fprintf(stderr, "OBJECTOVERLAP - ptr and color counts inconsistent\n"); return -1;
  }
  /* get the range of the input colors */
  mincol = maxcol = 0;
  n = ncol1;  pl = colset1;
  while (n--) { iq = *pl++; if (iq >  maxcol) maxcol = iq;  if (iq <  mincol) mincol = iq; }
  range = maxcol - mincol + 1;
  //printf("range = %d\n", range);
  /* save for adding new ones that extend the range */
  if (maxcol > 0) maxpluscol = maxcol; else maxpluscol = 0;
  if (mincol < 0) minminuscol = mincol; else minminuscol = 0;
  nsingle = nmult = nnew = 0;
  t2 = systime();

  /* now the main event */
  for (i=0;i<nin2;i++) {
    int jprev, nct, nuobj, *objq, *objover, cq;
    colq = colset2[i];   /* the color, we need for the sign */
    sgn1 = (colq > 0) ? 1: 0;
    pdesc = pbase2 + i;	 /* the ptr array */
    if (pdesc->type != LONG) { fprintf(stderr, "OBJECTOVERLAP - a ptr array is not type LONG\n"); break; }
    h = (struct ahead *) pdesc->spec.array.ptr;
    /* anything there ? */
    nd = h->ndim;
    nptr = h->dims[0]; if (nd > 1) for (j=1;j<nd;j++) nptr *= h->dims[j];
    //printf("size of feature, nptr = %d, ", nptr);
    if (nptr > 0) {
      po = (int *) ((char *)h + sizeof(struct ahead));
      /* a place to store the overlap indices */
      mq = nptr * sizeof(int);
      objover = (int *) malloc(mq);  /* for the values from cmap1 (aka fm) */
      objq = objover;
      nuobj = nct = 0;
      jprev = 0;
      /* look for matches in fm, the cmap1 2-D array */
      for (j=0;j<nptr;j++) {
        iq = po[j];
	jq = fm[iq];  /* this index in fm, the color map for t = 0 */
	if (jq > 0) {
	  /* don't count 0's, get color to obtain the sign, we want sign to match */
	  if (jq > ncol1) { fprintf(stderr, "OBJECTOVERLAP - cmap1 index larger than color1 size\n"); return -1; }
	  cq = colset1[jq-1];  /* this will not work well if colors1 and cmap1 aren't consistent */
	  sgn2 = (cq > 0) ? 1: 0;
	  if (sgn2 == sgn1) {
	    if (jq != jprev) { nuobj++; jprev = jq; }
	    *objq++ = jq;
	  }
	}
      }
      nct =  objq - objover;  /* our tally */
      /* did we get any ? */
      if (nct > 0) {
        /* multiple matches? */
	if (nuobj > 1) {
	  /* we count how many of each, need a bit of memory */
	  int *values, *vp, *valuecnts, *vc, nv, notfound, maxcnt, maxloc;
	  //printf("multiple matches, nuobj = %d, ", nuobj);
	  nmult++;
	  mq = nuobj * sizeof(int);
	  values = vp = malloc(mq);
	  valuecnts = vc = malloc(mq);
	  objq = objover;
	  jq = *objq++;
	  values[0] = jq;  valuecnts[0] = 1;  nv = 1;
	  for (j=1;j<nct;j++) {
	    jq = *objq++;
	    notfound = 1;
	    /* scan through already noted values */
	    for (k=0;k<nv;k++) {
	      if (jq == values[k]) {
	        valuecnts[k]++;  notfound = 0;  break;
	      }
	    }
	    if (notfound) {
	      values[nv] = jq;
	      valuecnts[nv] = 1;
	      nv++;
	    }
	  }
	  /* and we have to find the winner count wise */
	  maxcnt = 0;  maxloc = 0;
	  for (k=0;k<nv;k++) {
	    if (valuecnts[k] > maxcnt) { maxcnt = valuecnts[k];  maxloc = k; }
	  }
	  //printf("final nv = %d, winner is %d with count = %d\n", nv, maxloc, maxcnt);
	  //if (nv > nuobj) { printf("too many found in second pass at i =%d\n", i);  return -1; }
	  /* so our winner is at maxloc */
	  colinq = values[maxloc];
	  free(values);
	  free(valuecnts);
	} else {
	  /* only one match, just grab the first value in objover */
	  //printf("single match\n");
	  nsingle++;
	  colinq = *objover;
	}
	colinq--; 
	colq = colset1[colinq];
	/* for the selected inherited color index, zero the area in cmap1 (aka fm here), this is
	done to avoid "splits" wherein an old object's color is inherited by 2 or more */
	{
	  int	j, nd, nptr2, iq;
	  pdesc = pbase1 + colinq;	 /* the ptr array in in1 */
	  if (pdesc->type != LONG) { fprintf(stderr, "OBJECTOVERLAP - a ptr array is not type LONG in in1\n"); break; }
	  h = (struct ahead *) pdesc->spec.array.ptr;
	  /* anything there ? */
	  nd = h->ndim;
	  nptr2 = h->dims[0]; if (nd > 1) for (j=1;j<nd;j++) nptr2 *= h->dims[j];
          po = (int *) ((char *)h + sizeof(struct ahead));
	  //printf("size of #1 feature, nptr2 = %d, ", nptr2);
	  if (nptr2 > 0) {
	    for (j=0;j<nptr2;j++) {
              iq = po[j];
	      fm[iq] = 0;
	    }
	  }
	}
      } else {
        /* no matches, must be new */
	//printf("no match\n");
	nnew++;
	if (sgn1) colq = ++maxpluscol; else colq = --minminuscol;
      }
     //printf("final colq = %d\n", colq);
     colset2[i] = colq;  /* set the inherited or new color */
     free(objover);
    }
  }
  printf("nsingle, nmult, nnew = %d, %d, %d\n", nsingle, nmult, nnew);
  printf("old range: %d:%d, new range: %d:%d\n", mincol,maxcol,minminuscol,maxpluscol);
  t3 = systime();
  printf("objectoverlap total time = %10.2f ms, for setup = %10.2f ms, part 2 = %10.2f ms\n",
    1.E3*(t3-t1), 1.E3*(t2-t1), 1.E3*(t3-t2));
  return 1;
  }
/*-------------------------------------------------------------------------*/
