/* internal ana subroutines and functions, this is file fun4.c  */
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <sys/types.h>
#include <sys/time.h>
#include "ana_structures.h"
#include "defs.h"
#define ABS(x) ((x)>=0?(x):-(x))
#define	MIN(a,b) (((a)<(b))?(a):(b))
#define	MAX(a,b) (((a)>(b))?(a):(b))
#define BI_CUBIC_SMOOTH 4
#define BI_CUBIC 3
 extern	int scrat[NSCRAT];
 extern	struct sym_desc sym[];
 extern	struct sym_list		*subr_sym_list[];
 extern	int	temp_base;
 extern	int	ana_float();
 extern	char *find_sym_name(int iq);
 extern	int	ana_type_size[];
 extern	int	vfix_top, num_ana_subr, next_user_subr_num;
 extern	int	lastmin_sym, lastmax_sym;
 extern	float	float_arg();
 extern	double	double_arg();
 static	int	regridtypeflag = 0;
 int	resample_type = BI_CUBIC_SMOOTH;
 int	zoom_test =0;
 static	char	*basin_coords = {"$BASIN_COORDS"};
 static	char	*zoomtemp = {"$ZOOM_TEMP"};
 int	maxregridsize = 2048, stretchmark, tvsmt, badmatch, stretch_clip = 18;
 int	match_mess = 1, bias_flag = 1, badCellFlag, nBadCells, badValue;
 int	islit, dstype, itmax = 18, sort_flag = 0;
 int	result_sym;
 float	gwid, xoffset, yoffset;
 float	resid(), residfast();
 void	bicubic_f(), bicubic_fc();
 union	types_ptr { byte *b; short *w; int *l; float *f; double *d;};		
 /*------------------------------------------------------------------------- */
int ana_gridmatch_tell(narg,ps)	/* gridmatch_tell function */
 /* this is a special routine for studying the behavior of the
 LCT calculations */
 /* the call is offsets = gridmatch_tell(m1,m2,gx,gy,dx,dy,gwid,ndx)
	where	m1 = reference input image
	m2 = image to compare with m1, m1 and m2 must be same size
	gx = array of x gridpoints
	gy = array of y gridpoints, gx and gy must have same size
	dx and dy are the window size, and gwid is the gaussian mask width
	ndx is the differencing area to return, used for both x and y
 */
 int	narg, ps[];
 {
 int	type1, type2, type, iq, jq, nx, ny, nxg, nyg, dx, dy, dim[8];
 int	result_sym, nc, i1, i2, j1, j2, dx2, dy2, ndx;
 int	*gx, *gy;
 float	*out, *gwx, *gwy;
 union	types_ptr p1, p2;
 struct	ahead	*h;
 iq = ps[0];
				 /* first arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 type1 = sym[iq].type;
 type2 = sym[ ps[1] ].type;
 dstype = type = MAX(type1, type2);
 if ( type1 < type ) iq = upgrade(iq, type);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 p1.l = (int *) ((char *)h + sizeof(struct ahead));
 nx = h->dims[0];	ny = h->dims[1];
 iq = ps[1];
				 /* second arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 if ( type2 < type ) iq = upgrade(iq, type);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 p2.l = (int *) ((char *)h + sizeof(struct ahead));
 if ( nx != h->dims[0] || ny != h->dims[1]) return execute_error(103);
	 /* the grid arrays must be the same size, convert to I*4 */
 iq = ps[2];
 if ( sym[iq].class != 4 ) return execute_error(66);
 iq = ana_long(1, &iq);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 gx = (int *) ((char *)h + sizeof(struct ahead));
 nxg = h->dims[0];	nyg = h->dims[1];
 iq = ps[3];
				 /* gy, the third argument */
 if ( sym[iq].class != 4 ) return execute_error(66);
 iq = ana_long(1, &iq);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 gy = (int *) ((char *)h + sizeof(struct ahead));
 if ( nxg != h->dims[0] || nyg != h->dims[1]) return execute_error(103);
			 /* now get the window size and mask width */
 dx = int_arg( ps[4]);	dy = int_arg( ps[5]);  gwid = float_arg( ps[6]);
 ndx = int_arg( ps[7]);
 /* need to make ndx odd and >= 3 */
 if (ndx%2 == 0) ndx--;	ndx = MAX(ndx, 3);
			/* output is floating array of (2, nxg, nyg, ndx, ndx) */
 dim[2] = nxg;   dim[3] = nyg;  dim[0] = ndx;  dim[1] = ndx;
 result_sym = array_scratch( 3, 4, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 out = (float *) ((char *)h + sizeof(struct ahead));
		 /* following converted from macro stretch.mar */
 /* 9/25/92, stretch_clip has to be le half the cell width */
 /*printf("dx,dy,stretch_clip = %d %d %d\n",dx,dy,stretch_clip);*/
 if (stretch_clip >= (dx/2) || stretch_clip >= (dy/2) ) {
  stretch_clip = MIN( (dx/2), (dy/2) );
  printf("WARNING - !stretch_clip reduced to %d\n", stretch_clip);
  }
 /* try to use scratch for gaussian mask arrays (should work) */
 if ( (nx+ny) <= NSCRAT) gwx = (float *) scrat;
	 else gwx = (float *) malloc( (nx+ny)*4);
 /* printf("gwx = %d\n", gwx); */
 gwy = gwx + nx;
 nc = nxg * nyg;
 dx2 = dx/2;	dy2 = dy/2;	badmatch = 0;
 while (nc--) {		/* loop over grid points and get rectangles */
 i1 = *gx - dx2;  if ( i1 < 0 ) i1 = 0;  i1++; /* fortran indices */
 i2 = *gx++ + dx2;  if (i2 > nx) i2 = nx;
 j1 = *gy - dy2;  if ( j1 < 0 ) j1 = 0;  j1++; /* fortran indices */
 j2 = *gy++ + dy2;  if (j2 > ny) j2 = ny;
 xoffset = yoffset = 0;
 /* convert range to C (0 base) indices, do separately while debugging */
 i1--; i2--; j1--; j2--;
 /* get gaussian masks */
 gwind0( gwx, gwy, gwid, i1, i2, j1, j2);
 match_1_tell( p1.l, p2.l, i1, i2, j1, j2, nx, ny, gwx, gwy, ndx, out);
 out = out + ndx*ndx;
 }
 if ( gwx != (float *) scrat ) free(gwx);  /* free space if it was needed */
 return	result_sym;
 }
 /*-------------------------------------------------------------------------*/
int match_1_tell( p1, p2, nxa, nxb, nya, nyb, nx, ny, gwx, gwy, ndx, out)
 int	*p1, *p2, nxa, nxb, nya, nyb, nx, ny, ndx;
 float	*gwx, *gwy, *out;
 /* note that xoffset and yoffset are zeroed before we get in here */
 {
 int	idelx, idely, i, j, k, ndmx=1000, done[9];
 int	di, dj, kk, in, jn, iter, dd, badflag, ndx2;
 int	istart, iend, jstart, jend;
 float	av1, av2, cx, cy, cxx, cxy, cyy, avdif, t, res[9], buf[9], t1, t2;
 idelx = rint(xoffset);  idely = rint(yoffset);
      unbias( p1,p2,nxa,nxb,nya,nyb,nx,ny,gwx,gwy,&av1,&av2,&cx,&cy,
	 &cxx,&cxy,&cyy,idelx, idely);
 /* look at a 3x3 matrix of residuals centered at 0 offset, find the location
	 of the minimum, if not at center, then look at new 3x3 centered
	 on the edge minimum; repeat until min at center */
 iter = itmax;	badflag = 0;
 istart = idelx - ndx/2;	iend = istart + ndx;
 jstart = idely - ndx/2;	jend = jstart + ndx;
 avdif = 0.0;
 for (j = jstart; j < jend; j++) {
 for (i = istart; i < iend; i++) {
   if (bias_flag) {
   	avdif = av2 +  i*cx + j*cy + i*i*cxx + i*j*cxy + j*j*cyy - av1;
   *out++ = resid(p1,p2,i,j,nxa,nxb,nya,nyb,nx,ny,ndmx,gwx,gwy,avdif);
   } else {
   *out++ = residfast(p1,p2,i,j,nxa,nxb,nya,nyb,nx,ny,gwx,gwy);
   }
   }}
 return 1;
 }
 /*------------------------------------------------------------------------- */
int badcheck(m, i1, i2, j1, j2, nxs)
 int	*m, i1, i2, j1, j2;
 {
 /* check this cell for a bad value and return 1 if any found */
 union	types_ptr p;
 int stat = 0, i, j, jj;
 p.l = m;
 for (j=j1;j<j2;j++) { jj = nxs * j;
   switch (dstype) {
   case 0: for (i=i1;i<i2;i++)
   	if ( *(p.b + (i + jj)) == (unsigned char) badValue) { stat = 1; break;};
	break;
   case 1: for (i=i1;i<i2;i++)
   	if ( *(p.w + (i + jj)) == (short) badValue) { stat = 1; break;};
	break;
   case 2: for (i=i1;i<i2;i++)
   	if ( *(p.l + (i + jj)) == (int) badValue) { stat = 1; break;};
   case 3: for (i=i1;i<i2;i++)
   	if ( *(p.f + (i + jj)) == (float) badValue) { stat = 1; break;};
   case 4: for (i=i1;i<i2;i++)
   	if ( *(p.d + (i + jj)) == (double) badValue) { stat = 1; break;};
   }
   if (stat) break;
 }
 return stat;
 }
 /*------------------------------------------------------------------------- */
 /* a "standalone" gridmatch function */
int gridmatch(
   void *m1, /* input, address of the first image */
   void *m2, /* input, address of the second image */
   int type,  /* data type, 0=I*1, 1=I*2, 2=I*4, 3 = F*4, 4=F*8 */
   int nx,
   int ny,
   int *gx,
   int *gy,
   int nxg,
   int nyg,
   int dx, 
   int dy,
   float gwid,
   float *dxys	/* output, a (2,nxg, nyg) array of x/y displacements */
   /* note - if dxs is NULL, memory will be allocated for dxs and dys,
   otherwise we assume it already is */
   )
 {
 int	jq, goFlag;
 int	 nc, i1, i2, j1, j2, dx2, dy2;
 float	*gwx, *gwy;

 dstype = type;
 nBadCells = 0;
 if (nxg <= 0 || nxg > 1024 || nyg <=0 || nyg > 1024) {
   printf("a bad grid size, we got %d x %d\n", nxg, nyg);
   return 1; }
 /* get memory for the output arrays if dxs is NULL */
 if (dxys == NULL) {
   dxys = malloc( nxg * nyg * 4 * 2);
 }
		 /* following converted from macro stretch.mar */
 /* 9/25/92, stretch_clip has to be le half the cell width */
 /*printf("dx,dy,stretch_clip = %d %d %d\n",dx,dy,stretch_clip);*/
 if (stretch_clip >= (dx/2) || stretch_clip >= (dy/2) ) {
  stretch_clip = MIN( (dx/2), (dy/2) );
  printf("WARNING - !stretch_clip reduced to %d\n", stretch_clip);
  }
 /* scratch arrays for gaussian mask arrays */
 gwx = (float *) malloc( (nx+ny)*4);
 gwy = gwx + nx;

 nc = nxg * nyg;	/* number of cells */
 dx2 = dx/2;	dy2 = dy/2;	badmatch = 0;
 while (nc--) {		/* loop over grid points and get sub-rectangles */
 i1 = *gx - dx2;  if ( i1 < 0 ) i1 = 0;  /* C indices */
 i2 = *gx++ + dx2 - 1;  if (i2 >= nx) i2 = nx - 1;
 j1 = *gy - dy2;  if ( j1 < 0 ) j1 = 0;  /* C indices */
 j2 = *gy++ + dy2 - 1;  if (j2 >= ny) j2 = ny - 1;
 /* intialize offsets to zero */
 xoffset = yoffset = 0;
 /* check for bad cell values, if flag set */
 goFlag = 1;
 if (badCellFlag) {
   if ( badcheck(m1, i1, i2, j1, j2, nx) || badcheck(m2, i1, i2, j1, j2, nx))
   	{ goFlag = 0;  nBadCells++; }
 }  
 if (goFlag) {  /* if we have "bad" pixels, just keep the zero offset */
 /* get gaussian masks */
   gwind0( gwx, gwy, gwid, i1, i2, j1, j2);
   match_1( m1, m2, i1, i2, j1, j2, nx, ny, gwx, gwy);
 }
 *dxys++ = xoffset;	*dxys++ = yoffset;
 }
 free(gwx);
 return	0;
 }
 /*------------------------------------------------------------------------- */
 /* 2/18/2003 - modified to be easier to port by isolating the front end
 and the gridmatch in 2 parts, not done for ana_gridmatch_tell so use that
 or an old copy to restore. This has some drawbacks, it always does a malloc
 for the Gaussian masks ... */
int ana_gridmatch(narg,ps)	/* gridmatch function */
 /* the call is offsets = gridmatch(m1,m2,gx,gy,dx,dy,gwid)
	where	m1 = reference input image
	m2 = image to compare with m1, m1 and m2 must be same size
	gx = array of x gridpoints
	gy = array of y gridpoints, gx and gy must have same size
	dx and dy are the window size, and gwid is the gaussian mask width
 */
 int	narg, ps[];
 {
 int	type1, type2, type, iq, jq, nx, ny, nxg, nyg, dx, dy, dim[3];
 int	result_sym, nc, i1, i2, j1, j2, dx2, dy2;
 int	*gx, *gy;
 float	*outx, *gwx, *gwy;
 void	*p1, *p2;
 struct	ahead	*h;
 iq = ps[0];
				 /* first arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 type1 = sym[iq].type;
 type2 = sym[ ps[1] ].type;
 type = MAX(type1, type2);
 if ( type1 < type ) iq = upgrade(iq, type);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 p1 = (void *) ((char *)h + sizeof(struct ahead));
 nx = h->dims[0];	ny = h->dims[1];
 iq = ps[1];
				 /* second arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 if ( type2 < type ) iq = upgrade(iq, type);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 p2 = (void *) ((char *)h + sizeof(struct ahead));
 if ( nx != h->dims[0] || ny != h->dims[1]) return execute_error(103);
	 /* the grid arrays must be the same size, convert to I*4 */
 iq = ps[2];
 if ( sym[iq].class != 4 ) return execute_error(66);
 iq = ana_long(1, &iq);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 gx = (int *) ((char *)h + sizeof(struct ahead));
 nxg = h->dims[0];	nyg = h->dims[1];
 iq = ps[3];
				 /* gy, the third argument */
 if ( sym[iq].class != 4 ) return execute_error(66);
 iq = ana_long(1, &iq);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 gy = (int *) ((char *)h + sizeof(struct ahead));
 if ( nxg != h->dims[0] || nyg != h->dims[1]) return execute_error(103);
			 /* now get the window size and mask width */
 dx = int_arg( ps[4]);	dy = int_arg( ps[5]);  gwid = float_arg( ps[6]);
			 /* output is floating array of (2, nxg, nyg) */
 dim[0] = 2;  dim[1] = nxg;   dim[2] = nyg;
 result_sym = array_scratch( 3, 3, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 outx = (float *) ((char *)h + sizeof(struct ahead));
 /* now the call to gridmatch */
 gridmatch(p1, p2, type, nx,ny, gx, gy, nxg,nyg, dx,dy, gwid, outx);
 return	result_sym;
 }
 /*------------------------------------------------------------------------- */
int match_1( p1, p2, nxa, nxb, nya, nyb, nx, ny, gwx, gwy)
 int	*p1, *p2, nxa, nxb, nya, nyb, nx, ny;
 float	*gwx, *gwy;
 /* note that xoffset and yoffset are zeroed before we get in here */
 {
 int	idelx, idely, i, j, k, ndmx=1000, done[9];
 int	di, dj, kk, in, jn, iter, dd, badflag;
 float	av1, av2, cx, cy,cxx,cxy,cyy, avdif, t, res[9], buf[9], t1, t2;
 float	resid();
 for (i=0;i<9;i++) done[i] = 0;
 idelx = rint(xoffset);  idely = rint(yoffset);
 if (bias_flag) {
 unbias( p1,p2,nxa,nxb,nya,nyb,nx,ny,gwx,gwy,&av1,&av2,&cx,&cy,
	 &cxx,&cxy,&cyy,idelx, idely); }
 /* look at a 3x3 matrix of residuals centered at 0 offset, find the location
	 of the minimum, if not at center, then look at new 3x3 centered
	 on the edge minimum; repeat until min at center */
 iter = itmax;	badflag = 0;
 while (iter--) {
 for (k=0;k<9;k++) { if (done[k] == 0) {
   i = idelx + (k % 3) - 1;	j = idely + (k / 3) - 1;
   if (bias_flag) {
	avdif = av2 +  i*cx + j*cy + i*i*cxx + i*j*cxy + j*j*cyy - av1;
	res[k] = resid(p1,p2,i,j,nxa,nxb,nya,nyb,nx,ny,ndmx,gwx,gwy,avdif);
   } else {
	res[k] = residfast(p1,p2,i,j,nxa,nxb,nya,nyb,nx,ny,gwx,gwy);
   }
   }
 }
 t = res[0];	i = 0;
 for (k=1;k<9;k++) { if (res[k] < t) { t = res[k];  i = k; } }
 idelx += (i % 3) - 1;	idely += (i / 3) - 1;
 /* check if we have gone too far */
 if (abs(idelx) > stretch_clip || abs(idely) > stretch_clip ) {
  if (match_mess) printf("match - stretch_clip exceeded\n");
  badflag = 1; break; }
 if ( i == 4) break;			/* done if it is the center one */
 /* not in center, shuffle what we have to put the edge min in center */
 di = (i % 3) - 1;	dj = (i / 3) - 1;	dd = dj * 3 + di;
 for (k=0;k<9;k++) {	in = k%3 + di;	jn = k/3 + dj;
	 if (in>=0 && jn>=0 && in<3 && jn<3) {		/* in range */
	   done[k] = 1;	buf[k] = res[k+dd]; } else {
	   done[k] = 0;	}		/* not in range, mark undone */
 }
 for (k=0;k<9;k++) res[k] = buf[k];	/* put back in res array */
 }					/* end of iter while */
					 /* done or reached itmax, which ? */
 if (iter <= 0) {
  if (match_mess) printf("match - exceeded maximum iteration\n");
  badflag = 1; }
 if (badflag) {
  if (match_mess) printf("cell index range = %d %d %d %d\n", nxa,nxb,nya,nyb);
  badmatch++; return 1; }
					 /* must have been OK so far */
 getmin(res, &t1, &t2);
 xoffset = idelx + t1;	yoffset = idely + t2;
 if (isnan(xoffset) == 1) xoffset = 0.0;
 if (isnan(yoffset) == 1) yoffset = 0.0;
 return 1;
 }
 /*------------------------------------------------------------------------- */
int gwind0( gwx, gwy, gwid, nxa, nxb, nya, nyb)
 float	*gwx, *gwy, gwid;
 int	nxa, nxb, nya, nyb;
 {
 float	wid, xcen, ycen, sum, xq;
 int	i;
 wid = gwid*0.6005612;
 if (wid > 0.) { xcen = (nxa+nxb)/2.;ycen = (nya+nyb)/2.;
   for (i=nxa; i <= nxb;i++) { xq = (( i - xcen)/wid);
	 *(gwx + i) = exp(- (xq * xq)); }
   for (i=nya; i <= nyb;i++) { xq = (( i - ycen)/wid);
	 *(gwy + i) = exp(- (xq * xq)); }
 }	else {
   for (i=nxa; i <= nxb;i++) *(gwx + i) = 1.0;
   for (i=nya; i <= nyb;i++) *(gwy + i) = 1.0;
 }
 return 1;
 }
 /*------------------------------------------------------------------------- */
int unbias(m1,m2,nxa,nxb,nya,nyb,nxs,nys,gx,gy,av1,av2,cx,cy,cxx,cxy,cyy,
		 idelx, idely)
 float	*gx, *gy;
 float	*av1, *av2, *cx, *cy,*cxx,*cxy,*cyy;
 int	nxa, nxb, nya, nyb, nxs, nys, *m1, *m2, idelx, idely;
 {
 float	t0,t1,t2,t3,t4,t5, averag();
 /*  find weighted means of m1 & m2 over the window
     sets up quadratic fit to average of m2 as a fcn. of offsets
 */
 *av1 = averag(m1,nxa,nxb,nya,nyb,nxs,nys,0,0,gx,gy);
 t0  = averag(m2,nxa,nxb,nya,nyb,nxs,nys,idelx,idely,gx,gy);
 t1  = averag(m2,nxa,nxb,nya,nyb,nxs,nys,idelx+1,idely,gx,gy);
 t2  = averag(m2,nxa,nxb,nya,nyb,nxs,nys,idelx-1,idely,gx,gy);
 t3  = averag(m2,nxa,nxb,nya,nyb,nxs,nys,idelx,idely+1,gx,gy);
 t4  = averag(m2,nxa,nxb,nya,nyb,nxs,nys,idelx,idely-1,gx,gy);
 t5  = averag(m2,nxa,nxb,nya,nyb,nxs,nys,idelx+1,idely+1,gx,gy);
 *av2 = t0;
 *cx = 0.5*(t1-t2);
 *cy = 0.5*(t3-t4);
 *cxx = 0.5*(t1-2*t0+t2);
 *cyy = 0.5*(t3-2*t0+t4);
 *cxy = t5 + t0 - t1 - t3;
 return	1;
 }
 /*------------------------------------------------------------------------- */
float	averag(m,nxa,nxb,nya,nyb,nxs,nys,idx,idy,gx,gy)
 /* finds weighted average of array m over the block defined*/
 float	*gx, *gy;
 int	nxa, nxb, nya, nyb, idx, idy, *m;
 {
 union	types_ptr p;
 int	nxc, nxd, nyc, nyd, i, j, jj;
 float	sum, sumg, sumx, sumgx;
 p.l = m; 
 /* fix limits so sum doesn't run off edge of image*/
 if (nxa + idx < 0 ) nxc = -idx;  else nxc = nxa;
 if (nya + idy < 0 ) nyc = -idy;  else nyc = nya;
 if (nxb + idx > nxs) nxd = nxs - idx;  else  nxd = nxb;
 if (nyb + idy > nys) nyd = nys - idy;  else  nyd = nyb;
 sum = sumg =sumgx = 0.0;
 for (i=nxc;i<nxd;i++)  sumgx += *(gx + i);
 /* weighted sum in window, note switch on case before inner loop */
 for (j=nyc;j<nyd;j++) { sumx = 0.0; jj = idx + nxs * (j + idy);
  switch (dstype) {
   case 0: for (i=nxc;i<nxd;i++) sumx += *(gx + i) * *(p.b + (i + jj)); break;
   case 1: for (i=nxc;i<nxd;i++) sumx += *(gx + i) * *(p.w + (i + jj)); break;
   case 2: for (i=nxc;i<nxd;i++) sumx += *(gx + i) * *(p.l + (i + jj)); break;
   case 3: for (i=nxc;i<nxd;i++) sumx += *(gx + i) * *(p.f + (i + jj)); break;
   case 4: for (i=nxc;i<nxd;i++) sumx += *(gx + i) * *(p.d + (i + jj)); break;
 }
 sum += *(gy + j) * sumx;
 sumg += *(gy + j) * sumgx;
 }							/* end of j loop */
 return sum/sumg;
 }
 /*------------------------------------------------------------------------- */
float   resid(m1,m2,idx,idy,nxa,nxb,nya,nyb,nxs,nys,ndmx,gx,gy,bs)
 int	*m1, *m2;
 int     idx,idy,nxa,nxb,nya,nyb,nxs,nys,ndmx;
 float	*gx, *gy, bs;
 {
 int     nxc, nxd, nyc, nyd, nx, ny;
 union	types_ptr   m1p, m2p;
 float   *p1, *p2, *ps;
 float   sum, sumx, t, ndmx2;
 int     i, j;
 float   sumg;
 static  int     mxc, mxd, myc, myd;
 static  float   gsum;
 /*set up limits */
 nxc = nxa; if ( (nxc + idx) < 0 ) nxc = - idx;
 nyc = nya; if ( (nyc + idy) < 0 ) nyc = - idy;
 nxd = nxb; if ( (nxd + idx) >= nxs ) nxd = nxs - idx -1;
 nyd = nyb; if ( (nyd + idy) >= nys ) nyd = nys - idy -1;
 sum = sumg = 0.0;
 nx = nxd - nxc +1;
 p2 = gy + nyc; ps = gx + nxc;
 if (nxc!=mxc||nxd!=mxd||nyc!=myc||nyd!=myd) {
 
 /*sum gaussians over rectangle to get normalization (only if limits change)*/
 
 j = nyd -nyc + 1;
 /*printf("in resid, j,nx,*ps = %d %d %e\n",j,nx,*ps);*/
 while (j) { i = nx; p1 = ps;
	 while (i) { sumg += (*p1++) * (*p2); i--; }
	 p2++; j--; }
 gsum = sumg;
 mxc = nxc; mxd = nxd; myc = nyc; myd = nyd; } else sumg = gsum;
 
 /*printf("nxc, nxd, nyc, nyd = %d %d %d %d\n",nxc, nxd, nyc, nyd);*/
				 /*get start of m1 and m2 and the increments */
 m1p.l = m1;	m2p.l = m2;
 /*printf("m1p.b, m2p.b = %x %x\n",m1p.b, m2p.b);*/
 m1p.b = m1p.b + ana_type_size[dstype] * ((nyc * nxs ) + nxc);
 m2p.b = m2p.b + ana_type_size[dstype] * (( (nyc+ idy) * nxs ) + nxc + idx);
 ny = nxs - nx; /*residual increment after inner loop */
 p2 = gy + nyc;
 /*printf("m1p.b, m2p.b = %x %x, ny %d, bs %e\n",m1p.b, m2p.b,ny,bs);*/
 
				 /* now the loop to compute the residual */
				 /* with switch on type of array */
 j = nyd -nyc +1;
 ndmx2 = ndmx * ndmx;
 switch (dstype) {
 case 0:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = ((float)  *m1p.b++ - (float) *m2p.b++);
	 	t = t + bs; t = t*t;
		t = MIN(t, ndmx2);
		 sumx += (*p1++) * t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.b += ny;      m2p.b += ny;      j--; } break;
 case 1:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.w++ - *m2p.w++); t = t + bs; t = t*t;
		 t = MIN(t, ndmx2);
		 sumx += (*p1++) * t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.w += ny;      m2p.w += ny;      j--; } break;
 case 2:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.l++ - *m2p.l++); t = t + bs; t = t*t;
		 t = MIN(t, ndmx2);
		 sumx += (*p1++) * t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.l += ny;      m2p.l += ny;      j--; } break;
 case 3:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.f++ - *m2p.f++); t = t + bs; t = t*t;
		 t = MIN(t, ndmx2);
		 sumx += (*p1++) * t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.f += ny;      m2p.f += ny;      j--; } break;
 case 4:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.d++ - *m2p.d++); t = t + bs; t = t*t;
		 t = MIN(t, ndmx2);
		 sumx += (*p1++) * t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.d += ny;      m2p.d += ny;      j--; } break;
 }				/* end of type switch */
 /*return normalized residual */
 return  sum/sumg;
 }
 /*------------------------------------------------------------------------- */
float   residfast(m1,m2,idx,idy,nxa,nxb,nya,nyb,nxs,nys,gx,gy)
 /* no bias or min check */
 int	*m1, *m2;
 int     idx,idy,nxa,nxb,nya,nyb,nxs,nys;
 float	*gx, *gy;
 {
 int     nxc, nxd, nyc, nyd, nx, ny;
 union	types_ptr   m1p, m2p;
 float   *p1, *p2, *ps;
 float   sum, sumx, t, ndmx2;
 int     i, j;
 float   sumg;
 static  int     mxc, mxd, myc, myd;	/* some old values */
 static  float   gsum;
 /*set up limits */
 /* notes - nxa:nxb, nya:nyb are limits for m1
 	idx, idy are the shift for m2
	nxs, nys are the size of the original image
 */
 nxc = nxa; if ( (nxc + idx) < 0 ) nxc = - idx;
 nyc = nya; if ( (nyc + idy) < 0 ) nyc = - idy;
 nxd = nxb; if ( (nxd + idx) >= nxs ) nxd = nxs - idx -1;
 nyd = nyb; if ( (nyd + idy) >= nys ) nyd = nys - idy -1;
 sum = sumg = 0.0;
 
 nx = nxd - nxc +1;
 p2 = gy + nyc; ps = gx + nxc;
 
 if (nxc!=mxc||nxd!=mxd||nyc!=myc||nyd!=myd) {
 
 /*sum gaussians over rectangle to get normalization (only if limits change)*/
 
 j = nyd -nyc + 1;
 /*printf("in resid, j,nx,*ps = %d %d %e\n",j,nx,*ps);*/
 while (j) { i = nx; p1 = ps;
	 while (i) { sumg += (*p1++) * (*p2); i--; }
	 p2++; j--; }
 gsum = sumg;
 mxc = nxc; mxd = nxd; myc = nyc; myd = nyd; } else sumg = gsum;
 
 /*printf("nxc, nxd, nyc, nyd = %d %d %d %d\n",nxc, nxd, nyc, nyd);*/
				 /*get start of m1 and m2 and the increments */
 m1p.l = m1;	m2p.l = m2;
 /*printf("m1p.b, m2p.b = %x %x\n",m1p.b, m2p.b);*/
 m1p.b = m1p.b + ana_type_size[dstype] * ((nyc * nxs ) + nxc);
 m2p.b = m2p.b + ana_type_size[dstype] * (( (nyc+ idy) * nxs ) + nxc + idx);
 ny = nxs - nx; /*residual increment after inner loop */
 p2 = gy + nyc;
 /*printf("m1p.b, m2p.b = %x %x, ny %d, bs %e\n",m1p.b, m2p.b,ny,bs);*/
 
				 /* now the loop to compute the residual */
				 /* with switch on type of array */
 j = nyd -nyc +1;
 switch (dstype) {
 case 0:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = ((float)  *m1p.b++ - (float) *m2p.b++);
		 sumx += (*p1++) * t*t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.b += ny;      m2p.b += ny;      j--; } break;
 case 1:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.w++ - *m2p.w++);
		 sumx += (*p1++) * t*t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.w += ny;      m2p.w += ny;      j--; } break;
 case 2:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.l++ - *m2p.l++);
		 sumx += (*p1++) * t*t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.l += ny;      m2p.l += ny;      j--; } break;
 case 3:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.f++ - *m2p.f++);
		 sumx += (*p1++) * t*t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.f += ny;      m2p.f += ny;      j--; } break;
 case 4:
 while (j) { i = nx; p1 = ps; sumx = 0.0;
	 while (i) { t = (float) ( *m1p.d++ - *m2p.d++);
		 sumx += (*p1++) * t*t; i--; }
	 sum += (*p2++) * sumx;
	 m1p.d += ny;      m2p.d += ny;      j--; } break;
 }				/* end of type switch */
 /*return normalized residual */
 return  sum/sumg;
 }
 /*------------------------------------------------------------------------- */
int upgrade(i, type)
 int	i, type;
 {
 switch (type) {
 case 0: return ana_byte(1, &i);
 case 1: return ana_word(1, &i);
 case 2: return ana_long(1, &i);
 case 3: return ana_float(1, &i);
 case 4: return ana_double(1, &i);
 }
 }
 /*------------------------------------------------------------------------- */
int ana_basins(narg,ps)				/* basins function */
 /* the call is ic = basins(array)
    where array must be a 2-D array, returns the coordinates that
    each point would roll to */
 int	narg, ps[];
 {
 int	iq, n, m, np, nq, mq, ic, i, type, ip, result_sym, limit, mincount=0;
 int	*ptarr, *ptr, *p, dim[8], *bascord, jq;
 struct	ahead	*h;
 union	types_ptr q, qback, qahead;
 iq = ps[0];
				 /* first arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 n = h->dims[0];	m = h->dims[1];
 dim[0] = n;		dim[1] = m;
 if ( n < 3 || m < 3 ) {
 	printf("BASINS: arrays too small, nx, ny = %d %d\n", n, m);
	return -1; }
 np = n * m;
 type = sym[iq].type;
 ptarr = (int *) ((char *)h + sizeof(struct ahead));
 result_sym = array_scratch(2, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 ptr = (int *) ((char *)h + sizeof(struct ahead));
 /* try just the 4 adjacent points */
 /* the edges and corners are done as special cases */
 
 
 switch ( type) {

 case 0:
 case 1:
 case 2: {
 /* this handles all integer types, has several internal switches on
 type */
 int	wq, wmin, wqm1, wqp1, wqmr, wqpr;
 
 ip = 0;
 switch (type) {
 case 0:
 q.b = (byte*) ptarr;	/* first element */
 qback.b = q.b;		qahead.b = q.b + n;
 wq = (byte) *q.b++;		wqp1 = (byte) *q.b++;
 wqpr = (byte) *qahead.b++;
 break;
 case 1:
 q.w = (short*) ptarr;	/* first element */
 qback.w = q.w;		qahead.w = q.w + n;
 wq = (int) *q.w++;		wqp1 = (int) *q.w++;
 wqpr = (int) *qahead.w++;
 break;
 case 2:
 q.l = (int*) ptarr;	/* first element */
 qback.l = q.l;		qahead.l = q.l + n;
 wq = (int) *q.l++;		wqp1 = (int) *q.l++;
 wqpr = (int) *qahead.l++;
 break;
 }
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* rest of top row except last point, 3 checks, no wqmr */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;
 switch (type) {
 case 0: wqp1 = (int) *q.b++;	wqpr = (int) *qahead.b++; break;
 case 1: wqp1 = (int) *q.w++;	wqpr = (int) *qahead.w++; break;
 case 2: wqp1 = (int) *q.l++;	wqpr = (int) *qahead.l++; break;
 }
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* last point in top row, 2 checks */
 wqm1 = wq;	wmin = wq = wqp1;
 switch (type) {
 case 0:	wqpr = (int) *qahead.b++; break;
 case 1:	wqpr = (int) *qahead.w++; break;
 case 2:	wqpr = (int) *qahead.l++; break;
 }
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;

 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
 /* start row, not top or bottom */
 /* left hand edge, 3 checks, no wqm1 */
 switch (type) {
 case 0: wq = (int) *q.b++;	wqp1 = (int) *q.b++; wqpr = (int) *qahead.b++;
 wqmr = (int) *qback.b++;
 break;
 case 1: wq = (int) *q.w++;	wqp1 = (int) *q.w++; wqpr = (int) *qahead.w++;
 wqmr = (int) *qback.w++;
 break;
 case 2: wq = (int) *q.l++;	wqp1 = (int) *q.l++; wqpr = (int) *qahead.l++;
 wqmr = (int) *qback.l++;
 break;
 }
 /* assume central point is min to start */
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 ptr[ip] = i;	/* location of min */
 if (i == ip) mincount++;
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;
 switch (type) {
 case 0: wqp1 = (int) *q.b++;	wqpr = (int) *qahead.b++;
 wqmr = (int) *qback.b++; break;
 case 1: wqp1 = (int) *q.w++;	wqpr = (int) *qahead.w++;
 wqmr = (int) *qback.w++; break;
 case 2: wqp1 = (int) *q.l++;	wqpr = (int) *qahead.l++;
 wqmr = (int) *qback.l++; break;
 }
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 switch (type) {
 case 0: wqpr = (int) *qahead.b++; wqmr = (int) *qback.b++; break;
 case 1: wqpr = (int) *qahead.w++; wqmr = (int) *qback.w++; break;
 case 2: wqpr = (int) *qahead.l++; wqmr = (int) *qback.l++; break;
 }
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }	/* end of mq loop */
 /* finish the last row, no wqpr */
 /* first point of last row */
 switch (type) {
 case 0: wq = (int) *q.b++;		wqp1 = (int) *q.b++;
 wqmr = (int) *qback.b++;  break;
 case 1: wq = (int) *q.w++;		wqp1 = (int) *q.w++;
 wqmr = (int) *qback.w++;  break;
 case 2: wq = (int) *q.l++;		wqp1 = (int) *q.l++;
 wqmr = (int) *qback.l++;  break;
 }
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;
 switch (type) {
 case 0: wqp1 = (int) *q.b++; wqmr = (int) *qback.b++;  break;
 case 1: wqp1 = (int) *q.w++; wqmr = (int) *qback.w++;  break;
 case 2: wqp1 = (int) *q.l++; wqmr = (int) *qback.l++;  break;
 }
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 switch (type) {
 case 0: wqmr = (int) *qback.b++;  break;
 case 1: wqmr = (int) *qback.w++;  break;
 case 2: wqmr = (int) *qback.l++;  break;
 }
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 printf("cell count = %d, final ip = %d\n", mincount, ip);
 } break;
 case 3: {
 float	wq, wmin, wqm1, wqp1, wqmr, wqpr, *q, *qback, *qahead;
 
 q = (float *) ptarr;	/* first element */
 qback = q;		qahead = q + n;
 ip = 0;

 /* first point, 2 checks */
 wq = *q++;	wqp1 = *q++;
 wqpr = *qahead++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* rest of top row except last point, 3 checks, no wqmr */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
 wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* last point in top row, 2 checks */
 wqm1 = wq;	wmin = wq = wqp1;
 wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;

 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
 /* start row, not top or bottom */
 /* left hand edge, 3 checks, no wqm1 */
 wq = *q++;	wqp1 = *q++;
 wqmr = *qback++;	wqpr = *qahead++;
 /* assume central point is min to start */
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
 wqmr = *qback++;	wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 wqmr = *qback++;	wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }	/* end of mq loop */
 /* finish the last row, no wqpr */
 /* first point of last row */
 wq = *q++;	wqp1 = *q++;
 wqmr = *qback++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
 wqmr = *qback++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 wqmr = *qback++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 printf("cell count = %d, final ip = %d\n", mincount, ip);
 } break;
 case 4: {
 double	wq, wmin, wqm1, wqp1, wqmr, wqpr, *q, *qback, *qahead;
 
 q = (double *) ptarr;	/* first element */
 qback = q;		qahead = q + n;
 ip = 0;

 /* first point, 2 checks */
 wq = *q++;	wqp1 = *q++;
 wqpr = *qahead++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* rest of top row except last point, 3 checks, no wqmr */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
 wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* last point in top row, 2 checks */
 wqm1 = wq;	wmin = wq = wqp1;
 wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;

 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
 /* start row, not top or bottom */
 /* left hand edge, 3 checks, no wqm1 */
 wq = *q++;	wqp1 = *q++;
 wqmr = *qback++;	wqpr = *qahead++;
 /* assume central point is min to start */
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
 wqmr = *qback++;	wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 wqmr = *qback++;	wqpr = *qahead++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (wqpr < wmin) { wmin = wqpr; i = ip + n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }	/* end of mq loop */
 /* finish the last row, no wqpr */
 /* first point of last row */
 wq = *q++;	wqp1 = *q++;
 wqmr = *qback++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
 wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
 wqmr = *qback++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqp1 < wmin) { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 wqmr = *qback++;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 printf("cell count = %d, final ip = %d\n", mincount, ip);
 }  break;
 }	/* end of switch */
 /* ptr now contains the min of each pixel and its neighbors */
 /* mincount is the number of cells */
 /* mincount is number of mins */
 /* set up an array to hold unique values */
 jq = mincount;
 iq = find_sym(basin_coords);
 if (redef_array(iq,  2, 1, &jq) != 1) return -1;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 bascord = (int *) ((char *)h + sizeof(struct ahead));

 nq = np;
 ip = 0;
 p = ptr;
 limit = n*m;	/* it could happen! */
 while (nq-- > 0) {
 i = *p;
 /* printf("ip,i = %d %d\n", ip, i);*/
 if (i != ip) {
 /* not a min, roll down to one */
 /* printf("i, ptr[i] = %d %d\n", i, ptr[i]);*/
 ic = 0;
 while ( i != ptr[i]) {
 /* printf("while: i, ptr[i] = %d %d\n", i, ptr[i]);*/
   i = ptr[i];
   if (ic++ > limit) { printf("BASINS: rolling too long at index %d\n", ip);
   break; }
 }
 }
 else { *bascord++ = i; } /* was a min, add to our list of unique values */

 *p++ = i;
 ip++;
 }

 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_basins8(narg,ps)			/* basins8 function */
 /* the call is ic = basins8(array)
    where array must be a 2-D array, returns the coordinates that
    each point would roll to
    other names might be dumps, doldrums, sinks, pockets, dips, craters */
 int	narg, ps[];
 {
 int	iq, n, m, np, nq, mq, ic, i, type, ip, result_sym, limit, mincount=0;
 int	jq, dim[8];
 int	*ptarr, *ptr, *p, *bascord;
 struct	ahead	*h;
 union	types_ptr q, qback, qahead;
 iq = ps[0];
				 /* first arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 n = h->dims[0];	m = h->dims[1];
 dim[0] = n;		dim[1] = m;
 if ( n < 3 || m < 3 ) {
 	printf("BASINS: arrays too small, nx, ny = %d %d\n", n, m);
	return -1; }
 np = n * m;
 type = sym[iq].type;
 ptarr = (int *) ((char *)h + sizeof(struct ahead));
 result_sym = array_scratch(2, 2, dim);		/*for the result */
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 ptr = (int *) ((char *)h + sizeof(struct ahead));
 /* try just the 4 adjacent points */
 /* the edges and corners are done as special cases */

 switch ( type) {
 case 0:
 case 1:
 case 2: {
 /* this handles all integer types, has several internal switches on
 type */
 int	wq, wmin, wqm1, wqp1, wqmr, wqpr;
 int	wqmrm, wqmrp, wqprm, wqprp;
 
 
 /* in this version, allow motion for == but only to smaller indices */
 /* first point, 3 checks */
 ip = 0;
 switch (type) {
   case 0:
     q.b = (byte*) ptarr;	/* first element */
     qback.b = q.b;		qahead.b = q.b + n;
     wq = (byte) *q.b++;		wqp1 = (byte) *q.b++;
     wqpr = (byte) *qahead.b++;	wqprp = (byte) *qahead.b++;
     break;
   case 1:
     q.w = (short*) ptarr;	/* first element */
     qback.w = q.w;		qahead.w = q.w + n;
     wq = (int) *q.w++;		wqp1 = (int) *q.w++;
     wqpr = (int) *qahead.w++;	wqprp = (int) *qahead.w++;
     break;
   case 2:
     q.l = (int*) ptarr;	/* first element */
     qback.l = q.l;		qahead.l = q.l + n;
     wq = (int) *q.l++;		wqp1 = (int) *q.l++;
     wqpr = (int) *qahead.l++;	wqprp = (int) *qahead.l++;
     break;
 }
 wmin = wq;
 i = ip;
 if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
 if (wqprp < wmin) { wmin = wqprp; i = ip + n+ 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* rest of top row except last point, 5 checks, no wqmr */
 nq = n - 2;
 while (nq-- > 0) {
   wqm1 = wq;	wmin = wq = wqp1;
   wqprm = wqpr;	wqpr = wqprp;
   switch (type) {
   case 0: wqp1 = (int) *q.b++;	wqprp = (int) *qahead.b++; break;
   case 1: wqp1 = (int) *q.w++;	wqprp = (int) *qahead.w++; break;
   case 2: wqp1 = (int) *q.l++;	wqprp = (int) *qahead.l++; break;
   }
   i = ip;
   if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
   if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
   if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
   if (wqprp < wmin) { wmin = wqprp; i = ip + n + 1; }
   if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }
 /* last point in top row, 3 checks */
 wqm1 = wq;	wmin = wq = wqp1;
 wqpr = wqprp;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
 if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;

 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
   /* start row, not top or bottom */
   /* left hand edge, 5 checks, no wqm1 */
   switch (type) {
   case 0: wq = (int) *q.b++;	wqp1 = (int) *q.b++; wqpr = (int) *qahead.b++;
     wqprp = (int) *qahead.b++; wqmr = (int) *qback.b++; wqmrp = (int) *qback.b++;
     break;
   case 1: wq = (int) *q.w++;	wqp1 = (int) *q.w++; wqpr = (int) *qahead.w++;
     wqprp = (int) *qahead.w++; wqmr = (int) *qback.w++; wqmrp = (int) *qback.w++;
     break;
   case 2: wq = (int) *q.l++;	wqp1 = (int) *q.l++; wqpr = (int) *qahead.l++;
     wqprp = (int) *qahead.l++; wqmr = (int) *qback.l++; wqmrp = (int) *qback.l++;
     break;
   }
   /* assume central point is min to start */
   wmin = wq;
   i = ip;
   if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n+ 1; }
   if (wqpr < wmin)   { wmin = wqpr; i = ip + n; }
   if (wqprp < wmin)  { wmin = wqprp; i = ip + n + 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
   /* done with left edge, now the middle */
   nq = n - 2;
   while (nq-- > 0) {
     wqm1 = wq;	wmin = wq = wqp1;
     wqmrm = wqmr;	wqmr = wqmrp;
     wqprm = wqpr;	wqpr = wqprp;
     switch (type) {
     case 0:
       wqp1 = (int) *q.b++;	wqprp = (int) *qahead.b++; wqmrp = (int) *qback.b++;
       break;
     case 1:
       wqp1 = (int) *q.w++;	wqprp = (int) *qahead.w++; wqmrp = (int) *qback.w++;
       break;
     case 2:
       wqp1 = (int) *q.l++;	wqprp = (int) *qahead.l++; wqmrp = (int) *qback.l++;
       break;
     }
     i = ip;
     if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
     if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
     if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
     if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
     if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
     if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
     if (wqprp < wmin) { wmin = wqprp; i = ip + n + 1; }
     if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
     if (i == ip) mincount++;
     ptr[ip] = i;	/* location of min */
     ip++;
   }
   /* now the right hand edge, no wqp1 */
   wqm1 = wq;	wmin = wq = wqp1;
   wqmrm = wqmr;	wqmr = wqmrp;
   wqprm = wqpr;	wqpr = wqprp;
   i = ip;
   if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqpr < wmin)   { wmin = wqpr; i = ip + n; }
   /* error discovered 12/4/2003, following line wrong, replaced by
   line after it */
   //if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
   if (wqprm < wmin)  { wmin = wqprm; i = ip + n - 1; }
   if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }	/* end of mq loop */
 /* finish the last row, no wqpr */
 /* first point of last row */
 switch (type) {
 case 0: wq = (int) *q.b++;		wqp1 = (int) *q.b++;
   wqmr = (int) *qback.b++;	wqmrp = (int) *qback.b++;  break;
 case 1: wq = (int) *q.w++;		wqp1 = (int) *q.w++;
   wqmr = (int) *qback.w++;	wqmrp = (int) *qback.w++;  break;
 case 2: wq = (int) *q.l++;		wqp1 = (int) *q.l++;
   wqmr = (int) *qback.l++;	wqmrp = (int) *qback.l++;  break;
 }
 wmin = wq;
 i = ip;
 if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
 if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
   wqm1 = wq;	wmin = wq = wqp1;
   wqmrm = wqmr;	wqmr = wqmrp;
   switch (type) {
   case 0: wqp1 = (int) *q.b++; wqmrp = (int) *qback.b++;  break;
   case 1: wqp1 = (int) *q.w++; wqmrp = (int) *qback.w++;  break;
   case 2: wqp1 = (int) *q.l++; wqmrp = (int) *qback.l++;  break;
   }
   i = ip;
   if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
   if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
   if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 wqmrm = wqmr;	wqmr = wqmrp;
 i = ip;
 if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
 if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 printf("cell count = %d, final ip = %d\n", mincount, ip);
 } break;

 case 3: {
 float	wq, wmin, wqm1, wqp1, wqmr, wqpr, *q, *qback, *qahead;
 float	wqmrm, wqmrp, wqprm, wqprp;
 
 q = (float*) ptarr;	/* first element */
 qback = q;		qahead = q + n;
 ip = 0;
 
 /* in this version, allow motion for == but only to smaller indices */

 /* first point, 3 checks */
 wq = *q++;		wqp1 = *q++;
 wqpr = *qahead++;	wqprp = *qahead++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
 if (wqprp < wmin) { wmin = wqprp; i = ip + n+ 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* rest of top row except last point, 5 checks, no wqmr */
 nq = n - 2;
 while (nq-- > 0) {
   wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
   wqprm = wqpr;	wqpr = wqprp;		wqprp = *qahead++;
   i = ip;
   if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
   if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
   if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
   if (wqprp < wmin) { wmin = wqprp; i = ip + n + 1; }
   if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }
 /* last point in top row, 3 checks */
 wqm1 = wq;	wmin = wq = wqp1;
 wqpr = wqprp;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
 if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;

 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
   /* start row, not top or bottom */
   /* left hand edge, 5 checks, no wqm1 */
   wq = *q++;	wqp1 = *q++;
   wqpr = *qahead++;	wqprp = *qahead++;
   wqmr = *qback++;	wqmrp = *qback++;
   /* assume central point is min to start */
   wmin = wq;
   i = ip;
   if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
   if (wqpr < wmin)   { wmin = wqpr; i = ip + n; }
   if (wqprp < wmin)  { wmin = wqprp; i = ip + n + 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
   /* done with left edge, now the middle */
   nq = n - 2;
   while (nq-- > 0) {
     wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
     wqmrm = wqmr;	wqmr = wqmrp;	wqmrp = *qback++;
     wqprm = wqpr;	wqpr = wqprp;	wqprp = *qahead++;
     i = ip;
     if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
     if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
     if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
     if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
     if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
     if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
     if (wqprp < wmin) { wmin = wqprp; i = ip + n + 1; }
     if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
     if (i == ip) mincount++;
     ptr[ip] = i;	/* location of min */
     ip++;
   }
   /* now the right hand edge, no wqp1 */
   wqm1 = wq;	wmin = wq = wqp1;
   wqmrm = wqmr;	wqmr = wqmrp;
   wqprm = wqpr;	wqpr = wqprp;
   i = ip;
   if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqpr < wmin)   { wmin = wqpr; i = ip + n; }
   /* error discovered 12/4/2003, following line wrong, replaced by
   line after it */
   //if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
   if (wqprm < wmin)  { wmin = wqprm; i = ip + n - 1; }
   if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }	/* end of mq loop */
 /* finish the last row, no wqpr */
 /* first point of last row */
 wq = *q++;		wqp1 = *q++;
 wqmr = *qback++;	wqmrp = *qback++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
 if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
   wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
   wqmrm = wqmr;	wqmr = wqmrp;	wqmrp = *qback++;
   i = ip;
   if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
   if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
   if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 wqmrm = wqmr;	wqmr = wqmrp;
 i = ip;
 if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
 if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 printf("cell count = %d, final ip = %d\n", mincount, ip);
 } break;

 case 4: {
 double	wq, wmin, wqm1, wqp1, wqmr, wqpr, *q, *qback, *qahead;
 double	wqmrm, wqmrp, wqprm, wqprp;
 
 q = (double *) ptarr;	/* first element */
 qback = q;		qahead = q + n;
 ip = 0;
 
 /* in this version, allow motion for == but only to smaller indices */

 /* first point, 3 checks */
 wq = *q++;		wqp1 = *q++;
 wqpr = *qahead++;	wqprp = *qahead++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
 if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
 if (wqprp < wmin) { wmin = wqprp; i = ip + n+ 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* rest of top row except last point, 5 checks, no wqmr */
 nq = n - 2;
 while (nq-- > 0) {
   wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
   wqprm = wqpr;	wqpr = wqprp;		wqprp = *qahead++;
   i = ip;
   if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
   if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
   if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
   if (wqprp < wmin) { wmin = wqprp; i = ip + n + 1; }
   if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }
 /* last point in top row, 3 checks */
 wqm1 = wq;	wmin = wq = wqp1;
 wqpr = wqprp;
 i = ip;
 if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
 if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
 if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;

 /* now the interior rows */
 mq = m - 2;
 while (mq-- > 0) {
   /* start row, not top or bottom */
   /* left hand edge, 5 checks, no wqm1 */
   wq = *q++;	wqp1 = *q++;
   wqpr = *qahead++;	wqprp = *qahead++;
   wqmr = *qback++;	wqmrp = *qback++;
   /* assume central point is min to start */
   wmin = wq;
   i = ip;
   if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n+ 1; }
   if (wqpr < wmin)   { wmin = wqpr; i = ip + n; }
   if (wqprp < wmin)  { wmin = wqprp; i = ip + n + 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
   /* done with left edge, now the middle */
   nq = n - 2;
   while (nq-- > 0) {
     wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
     wqmrm = wqmr;	wqmr = wqmrp;	wqmrp = *qback++;
     wqprm = wqpr;	wqpr = wqprp;	wqprp = *qahead++;
     i = ip;
     if (wqm1 <= wmin) { wmin = wqm1; i = ip - 1; }
     if (wqp1 < wmin)  { wmin = wqp1; i = ip + 1; }
     if (wqmr <= wmin) { wmin = wqmr; i = ip - n; }
     if (wqpr < wmin)  { wmin = wqpr; i = ip + n; }
     if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
     if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
     if (wqprp < wmin) { wmin = wqprp; i = ip + n + 1; }
     if (wqprm < wmin) { wmin = wqprm; i = ip + n - 1; }
     if (i == ip) mincount++;
     ptr[ip] = i;	/* location of min */
     ip++;
   }
   /* now the right hand edge, no wqp1 */
   wqm1 = wq;	wmin = wq = wqp1;
   wqmrm = wqmr;	wqmr = wqmrp;
   wqprm = wqpr;	wqpr = wqprp;
   i = ip;
   if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqpr < wmin)   { wmin = wqpr; i = ip + n; }
   /* error discovered 12/4/2003, following line wrong, replaced by
   line after it */
   //if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
   if (wqprm < wmin)  { wmin = wqprm; i = ip + n - 1; }
   if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }	/* end of mq loop */
 /* finish the last row, no wqpr */
 /* first point of last row */
 wq = *q++;		wqp1 = *q++;
 wqmr = *qback++;	wqmrp = *qback++;
 wmin = wq;
 i = ip;
 if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
 if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
 if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 ip++;
 /* done with left edge, now the middle */
 nq = n - 2;
 while (nq-- > 0) {
   wqm1 = wq;	wmin = wq = wqp1;	wqp1 = *q++;
   wqmrm = wqmr;	wqmr = wqmrp;	wqmrp = *qback++;
   i = ip;
   if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
   if (wqp1 < wmin)   { wmin = wqp1; i = ip + 1; }
   if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
   if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
   if (wqmrp <= wmin) { wmin = wqmrp; i = ip - n + 1; }
   if (i == ip) mincount++;
   ptr[ip] = i;	/* location of min */
   ip++;
 }
 /* now the right hand edge, no wqp1 */
 wqm1 = wq;	wmin = wq = wqp1;
 wqmrm = wqmr;	wqmr = wqmrp;
 i = ip;
 if (wqm1 <= wmin)  { wmin = wqm1; i = ip - 1; }
 if (wqmr <= wmin)  { wmin = wqmr; i = ip - n; }
 if (wqmrm <= wmin) { wmin = wqmrm; i = ip - n - 1; }
 if (i == ip) mincount++;
 ptr[ip] = i;	/* location of min */
 printf("cell count = %d, final ip = %d\n", mincount, ip);
 } break;


 }	/* end of switch */
 /* ptr now contains the min of each pixel and its neighbors */
 /* mincount is number of mins */
 /* set up an array to hold unique values */
 jq = mincount;
 iq = find_sym(basin_coords);
 if (redef_array(iq,  2, 1, &jq) != 1) return -1;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 bascord = (int *) ((char *)h + sizeof(struct ahead));

 nq = np;
 ip = 0;
 p = ptr;
 limit = n*m;	/* it could happen! */
 while (nq-- > 0) {
   i = *p;
   /* printf("ip,i = %d %d\n", ip, i);*/
   if (i != ip) {
     /* not a min, roll down to one */
     /* printf("i, ptr[i] = %d %d\n", i, ptr[i]);*/
     ic = 0;
     while ( i != ptr[i]) {
     /* printf("while: i, ptr[i] = %d %d\n", i, ptr[i]);*/
       i = ptr[i];
       if (ic++ > limit) { printf("BASINS: rolling too long at index %d\n", ip);
       return -1; }
     }
   } 
   else { *bascord++ = i; } /* was a min, add to our list of unique values */
   *p++ = i;
   ip++;
 }

 return result_sym;
 }
 /*------------------------------------------------------------------------- */
 /* some variables common to several routines */
 static	int	nm1, mm1, nm2, mm2, n, regrid_type, stretchmark_flag;
 static	float	fnm1, fnm5, fmm1, fmm5, xl,yl;
 static	union	types_ptr base, out;
 /*------------------------------------------------------------------------- */
int ana_stretch(narg,ps)			/* stretch function */
 /* the call is MS = STRETCH( M2, DELTA)
    where M2 is the original array to be destretched, MS is the result, and
    DELTA is a displacement grid as generated by GRIDMATCH */
 int	narg, ps[];
 {
 int	iq, m, nxg, nyg, result_sym, jy, j1, j2;
 int	nxgm, nygm, ix, iy, jx, i1, i2;
 float	xd, yd, ys, y, x, xs, dy, dy1, dy0;
 float	dx, dx1, dx0, fn, fm, xq, xinc, yinc;
 float	w1, w2, w3, w4, c1, c2, c3, c4, b1, b2, b3, b4;
 float	*jpbase, *jbase, *xgbase;
 struct	ahead	*h;
 iq = ps[0];
 stretchmark_flag = 1;
				 /* first arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 regrid_type = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 base.l = (int *) ((char *)h + sizeof(struct ahead));
 n = h->dims[0];	m = h->dims[1];
 nm1 = n - 1; mm1 = m - 1; nm2 = n -2; mm2 = m -2;
 fn = n; fm = m;
 fnm1 = fn - 1.0;	fnm5 = fn -0.5;
 fmm1 = fm - 1.0;	fmm5 = fm -0.5;

					 /* make the output array */
 result_sym = array_clone(iq, regrid_type);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 out.l = (int *) ((char *)h + sizeof(struct ahead));
				 /* second arg must be a displacement grid */
 iq = ps[1];
 if ( sym[iq].class != 4 ) return execute_error(66);
 iq = ana_float(1, &ps[1] );
 h = (struct ahead *) sym[iq].spec.array.ptr;
 xgbase = (float *) ((char *)h + sizeof(struct ahead));
 if ( h->ndim != 3 | h->dims[0] != 2 ) return execute_error(102);
 nxg = h->dims[1];	nyg = h->dims[2];  nxgm = nxg - 1;  nygm = nyg - 1;
	 /* linearly interpolate the displacement grid values over array */
	 /* similar to regrid3 in inner part */
 xd = (float) n / nxg;	xinc = 1. / xd;	xs = xinc + (xd - 1.)/(2. * xd);
 yd = (float) m / nyg;	yinc = 1. / yd;	y = ys = yinc + (yd - 1.)/(2. * yd);
 for (iy=0;iy<m;iy++) {
  x = xs;  jy = y;  dy = y - jy;  dy1 = 1.0 - dy;
  if (jy < 1) { j1 = j2 = 0; } else { if (jy >= nyg) { j1 = j2 = nygm; }
	  else { j1 = jy - 1; j2 = j1 + 1; } }
  jbase  = xgbase + j1 * 2 * nxg;
  jpbase = xgbase + j2 * 2 * nxg;
   for (ix=0;ix<n;ix++) {
   jx = x;  dx = x - jx;  dx1 = 1.0 - dx;
   if (jx < 1) { i1 = i2 = 0; } else { if (jx >= nxg) { i1 = i2 = nxgm; }
	   else { i1 = jx - 1; i2 = i1 + 1; } }
   w1 = dy1 * dx1;  w2 = dy1 * dx;  w3 = dy * dx1;  w4 = dy * dx;
   i1 = 2 * i1;  i2 = 2 * i2;
   xl = w1 * *(jbase+i1) + w2 * *(jbase+i2) + w3 * *(jpbase+i1)
	   + w4 * *(jpbase+i2) + (float) ix;
   i1 += 1;  i2 += 1;
   yl = w1 * *(jbase+i1) + w2 * *(jbase+i2) + w3 * *(jpbase+i1)
	   + w4 * *(jpbase+i2) + (float) iy;

   /* xl, yl is the place, now do a cubic interpolation for value */

   switch (resample_type) {
    case BI_CUBIC_SMOOTH:	bicubic_f();  break;
    case BI_CUBIC:		bicubic_fc(); break;
   }
   x = xs + ix*xinc; }
  y = ys + iy*yinc; }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_getmin9(narg,ps)			/* getmin9 function */
 /* local minimum for a 3x3 array */
 int	narg, ps[];
 {
 int	iq;
 float	*p, x0, y0;
 struct	ahead	*h;
					 /*first arg must be a 3x3 array */
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(100);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 || h->dims[0] != 3 || h->dims[1] != 3)
	 return execute_error(100);
 iq = ana_float(1, &iq );
 h = (struct ahead *) sym[iq].spec.array.ptr;
 p = (float *) ((char *)h + sizeof(struct ahead));
 getmin(p, &x0, &y0);
 if (redef_scalar( ps[1], 3, &x0) != 1) return execute_error(13);
 if (redef_scalar( ps[2], 3, &y0) != 1) return execute_error(13);
 return 1;
 }
 /*------------------------------------------------------------------------- */
int getmin(p, x0, y0)				/* internal min */
 float	*p, *x0, *y0;
 {
 float	f11, f12, f13, f21, f22, f23, f31, f32, f33;
 float	toler, fx, fy, t, fxx, fyy, fxy;
				 /* find the min, p points to a 3x3 array */
 f11 = *p++;	f21 = *p++;	f31 = *p++;
 f12 = *p++;	f22 = *p++;	f32 = *p++;
 f13 = *p++;	f23 = *p++;	f33 = *p++;
 
 fx = 0.5 * ( f32 - f12 );	fy = 0.5 * ( f23 - f21 );
 t = 2.* ( f22 );
 fxx =  f32 + f12 - t;	fyy = f23 + f21 - t;
 /* find in which quadrant the minimum lies */
 if (f33 < f11)  { if (f33 < f31) {
	 if (f33 < f13) fxy = f33+f22-f32-f23; else fxy = f23+f12-f22-f13; }
	 else			 {
	 if (f31 < f13) fxy = f32+f21-f31-f22; else fxy = f23+f12-f22-f13; }
  } else   { if (f11 < f31) {
	 if (f11 < f13) fxy = f22+f11-f21-f12; else fxy = f23+f12-f22-f13; }
	 else			 {
	 if (f31 < f13) fxy = f32+f21-f31-f22; else fxy = f23+f12-f22-f13; }
 }
 t = -1./(fxx *fyy - fxy *fxy);
 *x0 = t * (fx * fyy - fy * fxy);
 *y0 = t * (fy * fxx - fx * fxy);
 if ( ABS(*x0) >= 0.75 || ABS(*x0) >= 0.75 ) { *x0 = -fx/fxx; *y0 = -fy/fyy; }
 return 1;
 }
 /*------------------------------------------------------------------------- */
int ana_expand(narg,ps)				/* expand function */ 
 /* magnify (or even demagnify) an array */
 /* result = expand(source, magx, magy, type)
    type = 0 for nearest neighbor, 1 for bilinear */
 int	narg, ps[];
 {
 int	n, m, ns, ms, dim[2], j, i, inc, nc, result_sym, oldi, iq, type, ns2;
 float	zc1, zc2, z00, z01, z10, z11, xq;
 float	sx, sy, stepx, stepy, yrun, xrun, xbase, q, p, fn, fm;
 struct	ahead	*h;
 union	types_ptr base, jbase, out, jout, nlast;
				 /* first argument must be a 2-D array */
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(95);
 type = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 base.l = (int *) ((char *)h + sizeof(struct ahead));
					 /* we want a 1 or 2-D array only */
 if ( h->ndim > 2 ) return execute_error(95);
 n = h->dims[0];	  if ( h->ndim > 1 ) m = h->dims[1]; else m = 1;
 fn = (float) n;	fm = (float) m;
					 /* get the magnification(s) */
 sx = float_arg( ps[1]);	sy = sx;
 if (narg>2) sy = float_arg( ps[2]);
 if (narg>3) tvsmt = int_arg( ps[3]); 
						 /* compute new ns and ms */
 ns = n * sx;	ms = m * sy;	ms = MAX(ms, 1); ns = MAX(ns, 1);
 dim[0] = ns;	dim[1] = ms;
 result_sym = array_scratch(type, h->ndim, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 out.l = jout.l = (int *) ((char *)h + sizeof(struct ahead));
							 /* setup the steps */
 stepx = 1.0 / sx;	stepy = 1.0 /sy;	xbase = 0.5 + 0.5 * stepx;
 yrun = 0.5 + 0.5 * stepy;			inc = ana_type_size[type];
 nlast.b = base.b + inc * (m-2) * n;
 jbase.b = base.b;
	 /* use nearest neighbor if tvsmt = 0 and bilinear otherwise */
 if ( tvsmt != 0 ) {
 while (ms--) {				/* column loop */
 j = yrun;	q = yrun - j;
 if ( j < 1 ) {				/*bottom border region */
 jbase.b = base.b;	q = 0.0;
 } else {
 if ( j >= m ) {				/* upper border */
 q = 1.0;	jbase.b = nlast.b;
 }
 else					/* normal case */
 jbase.b = base.b + (j - 1) * n * inc;
 }
 oldi = 0;				/* for all cases */
 switch (type) {
 case 0: z00 = *(jbase.b);  z01 = *(jbase.b + n); break;
 case 1: z00 = *(jbase.w);  z01 = *(jbase.w + n); break;
 case 2: z00 = *(jbase.l);  z01 = *(jbase.l + n); break;
 case 3: z00 = *(jbase.f);  z01 = *(jbase.f + n); break;
 case 4: z00 = *(jbase.d);  z01 = *(jbase.d + n); break;
 }
 z10 = z00;	z11 = z01;	zc1 = (z01 - z00) * q + z00;   zc2 = 0.0;
 xrun = xbase;	out.b = jout.b;
 ns2 = ns;	/* L.S. 2/26/96 fix for bug in the loop below */
 while (ns2--) {				/* row loop */
  i = xrun;
  if (i >=n) i = n - 1;
  p = xrun - i;
 if ( i != oldi) {			/* need new corners */
 z00 = z10;	z01 = z11;	oldi = i;
 switch (type) {
 case 0: z10 = *(jbase.b + i);  z11 = *(jbase.b + i + n); break;
 case 1: z10 = *(jbase.w + i);  z11 = *(jbase.w + i + n); break;
 case 2: z10 = *(jbase.l + i);  z11 = *(jbase.l + i + n); break;
 case 3: z10 = *(jbase.f + i);  z11 = *(jbase.f + i + n); break;
 case 4: z10 = *(jbase.d + i);  z11 = *(jbase.d + i + n); break;
 }
 zc1 = z10 - z00;  zc2 = (z11 - z01 - zc1) * q + zc1;
 zc1 = (z01 - z00) * q + z00;
 }
 xq = p * zc2 + zc1;
 switch (type) {
 case 0: xq = MAX( 0, MIN( 255, xq));	*out.b++ = xq; break;
 case 1: *out.w++ = xq; break;
 case 2: *out.l++ = xq; break;
 case 3: *out.f++ = xq; break;
 case 4: *out.d++ = xq; break;
 }
 xrun = xrun + stepx;
 /* L.S. fix for bug, next line removed and loop over a count instead
 otherwise we sometimes go over by 1 and get a memory segment fault */
 /* if (xrun >= fn) break;*/			/* way out of row loop */
 }
						 /* rest of row */
 xq = zc1 + zc2;	jout.b += ns * inc;	nc = (jout.b - out.b) / inc;
 while (nc > 0 ) { nc--;
 switch (type) {
 case 0: xq = MAX( 0, MIN( 255, xq));	*out.b++ = xq; break;
 case 1: *out.w++ = xq; break;
 case 2: *out.l++ = xq; break;
 case 3: *out.f++ = xq; break;
 case 4: *out.d++ = xq; break;
 }
 }
 yrun += stepy;
 }
 } else {				/* nearest neighbor case */
 while (ms--) {				/* column loop */
 j = yrun - 0.5;
 /*printf("j, yrun = %d %f\n",j, yrun);*/
 if ( j < 1 ) {				/*bottom border region */
 jbase.b = base.b;
 } else {
 if ( j >= m ) {				/* upper border */
 jbase.b = base.b + inc * (m-1) * n;
 }
					 /* normal case */
 jbase.b = base.b + j * n * inc;
 /*printf("normal, jbase.b = %d\n",jbase.b);*/
 }
 xrun = xbase - 0.5;	out.b = jout.b;
 ns2 = ns;	/* L.S. 2/26/96 fix for bug in the loop below */
 while (ns2--) {				/* row loop */
 /* while (1) { */
 i = xrun;
 /* L.S. fix for bug, next line removed and loop over a count instead
 otherwise we sometimes go over by 1 and get a memory segment fault */
 /* if (i >= n) break; */			/* way out of row loop */
 /*printf("i, xrun = %d %f\n",i,xrun);*/
 switch (type) {
 case 0: *out.b++ = *(jbase.b + i); break;
 case 1: *out.w++ = *(jbase.w + i); break;
 case 2: *out.l++ = *(jbase.l + i); break;
 case 3: *out.f++ = *(jbase.f + i); break;
 case 4: *out.d++ = *(jbase.d + i); break;
 }
 xrun = xrun + stepx;
 }
					 /* rest of row (if any) */
 i = n - 1;	jout.b += ns * inc;	nc = (jout.b - out.b) / inc;
 /*printf("nc = %d\n",nc);*/
 while (nc > 0 ) { nc--;
 switch (type) {
 case 0: *out.b++ = *(jbase.b + i); break;
 case 1: *out.w++ = *(jbase.w + i); break;
 case 2: *out.l++ = *(jbase.l + i); break;
 case 3: *out.f++ = *(jbase.f + i); break;
 case 4: *out.d++ = *(jbase.d + i); break;
 }
 }
 yrun += stepy;
 }
 }
 return result_sym;
 }
 /*--------------------------------------------------------------------------*/
double systime()				/* internal systime */
 {
 double t;
 struct timeval tp;
 struct timezone tzp;
 gettimeofday(&tp, &tzp);
 t = (double) tp.tv_sec + .000001* (double) tp.tv_usec;
 return t;
 }
 /*------------------------------------------------------------------------- */
int zoomerx(ain,n,m,symout,nx2,ny2,sym_flag,zfac,type) /* internal zoom */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag, zfac, type;
 /* zoom a byte array by zfac */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 // double t1, t2;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase;
 union	{ byte	bb[4];   int  ii; } tmp;
 int	*p1, *p2, n2, nst7, tmpint, k, nsd2, delta, nxdupe, nydupe, mm, nn;
 pin = ain;
 /* compute new ns and ms, ns must be 0 mod 4 */
 ns = n * zfac;	ms = m * zfac;
 leftover = (4-ns%4)%4;
 ns = ns + leftover;	/* thus ns is 0 mod 4 */
 *nx2 = ns;	*ny2 = ms;
 dim[0] = ns;	dim[1] = ms;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase = (byte *) ((char *)h + sizeof(struct ahead));

 // t1 = systime();

 if (zfac >= 4) {
 /* these are zoomed enough to have an I*4 or more across */
 /* assumes zfac is either 4, 8, or 16; e.g., 5 wouldn't work */
 p1 = (int *) poutbase;
 delta = (zfac/4)*(n-1);
 nxdupe = (zfac)/4;
 nydupe = zfac - 1;
 for (j=0;j<m;j++) {
 for (i=0;i<n;i++)
   { 
    /* first pack an I*4 with the result */
    tmp.bb[0] = tmp.bb[1] = tmp.bb[2] = tmp.bb[3] = *pin++;
    nn = nxdupe;
    while (nn--) *p1++ = tmp.ii;
    mm = nydupe;
    p2 = p1 + delta;
    while (mm--) {
      nn = nxdupe;
      while (nn--) { *p2++ = tmp.ii; }
      p2 = p2 + delta;
     }

   }
  // p1 = p2;
   p1 = p2 - delta;
   }
 }

 //t2 = systime();
 //printf("time = %12.4f\n", t2-t1);
 return 1;
 }
 /*------------------------------------------------------------------------- */
int zoomer2(ain, n, m, symout, nx2, ny2, sym_flag) /* internal zoom2 */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag;
 /* zoom a byte array by 2 */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase, tmp;
 int	*p1, *p2, n2, k;
#ifdef __alpha
 long	*p8;
 int	alpha_flag = 0;
#endif
 pin = ain;
 /* for current testing on alpha, n%4 must be 0 */
#ifdef __alpha
 if (n%4 == 0) alpha_flag = 1;
#endif
					/* compute new ns and ms */
 ns = n * 2;	ms = m * 2;
 leftover = (4-ns%4)%4;
 ns = ns + leftover;	/* thus ns is 0 mod 4 */
 *nx2 = ns;	*ny2 = ms;
 inc = ns -1;
 dim[0] = ns;	dim[1] = ms;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase =(byte *) ((char *)h + sizeof(struct ahead));
 
 /* first load every other line */
 /*t1 = systime();*/
#ifdef __alpha
 if (alpha_flag) {	/* this is a faster method if we have every row */
 			/* aligned I*4 */
 p8 = (long *) poutbase;
 p1 = (int *) pin;
 for (j=0;j<m;j++) {
  for (i=0; i < n/4; i++) {
   register unsigned long pix = (long) *p1++;
   register unsigned long  zoom_reg = 0;
   zoom_reg += pix & 0xff;	pix = pix << 8;
   zoom_reg += pix & 0xffff00;	pix = pix << 8;
   zoom_reg += pix & 0xffff000000;	pix = pix << 8;
   zoom_reg += pix & 0xffff0000000000;	pix = pix << 8;
   zoom_reg += pix & 0xff00000000000000;
   *p8++ = zoom_reg;
  }
 p8 += n/4;
}
 } else {		/* otherwise we do it the plain way */
 for (j=0;j<m;j++) {
  for (i=0;i<n;i++) { *pout++ = *pin;  *pout++ = *pin;  pin++; }
  if (leftover) { k = leftover; tmp = *(pin-1); while(k--) *pout++ = tmp; }
 pout += ns;
 }
 }
#else
 /* for non-alpha, always the plain way */
 for (j=0;j<m;j++) {
  for (i=0;i<n;i++) { *pout++ = *pin;  *pout++ = *pin;  pin++; }
  if (leftover) { k = leftover; tmp = *(pin-1); while(k--) *pout++ = tmp; }
 pout += ns;
 }
#endif

 /*t2 = systime();*/
 /* now dupe the lines, this is why we wanted ns%4 = 0 */
 ns4 = ns/4;
 p1 = (int *) poutbase;
 p2 = p1 + ns4;
 for (j=0;j<m;j++) {
  for (i=0;i<ns4;i++) {
  *p2++ = *p1++;
  }
 p1 += ns4;	p2 += ns4;
 }
 return 1;
 }
 /*------------------------------------------------------------------------- */
int zoomer3(ain, n, m, symout, nx2, ny2, sym_flag) /* internal zoom3 */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag;
 /* zoom a byte array by 3 */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase, tmp;
 int	*p1, *p2, *p3, n2, nst2, tmpint, k, nsd2;
 pin = ain;
					/* compute new ns and ms */
 ns = n * 3;	ms = m * 3;
 leftover = (4-ns%4)%4;
 ns = ns + leftover;	/* pad to even multiple of 4 for I*4 transfers */
 *nx2 = ns;	*ny2 = ms;
 dim[0] = ns;	dim[1] = ms;
 nst2 = 2*ns;
 inc = ns -1;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase =(byte *) ((char *)h + sizeof(struct ahead));
 
 /* first load every third line */
 /*t1 = systime();*/
 for (j=0;j<m;j++) {
 for (i=0;i<n;i++)
 	{tmp = *pin++; *pout++ = tmp; *pout++ = tmp; *pout++ = tmp;}
 if (leftover) { k = leftover; tmp = *(pin-1); while(k--) *pout++ = tmp; }
 pout += nst2;
 }

 /*t2 = systime();*/
 /* now dupe the lines, this is why we needed ns%4 = 0 */
 ns4 = ns/4;
 nsd2 = ns/2;
 p1 = (int *) poutbase;
 p2 = p1 + ns4;
 p3 = p2 + ns4;
 for (j=0;j<m;j++) {
  for (i=0;i<ns4;i++) { tmpint = *p1++; *p2++ = tmpint; *p3++ = tmpint; }
 p1 += nsd2;	p2 += nsd2;	p3 += nsd2;
 }
 return 1;
 }
 /*------------------------------------------------------------------------- */
int zoomer4(ain, n, m, symout, nx2, ny2, sym_flag) /* internal zoom4 */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag;
 /* zoom a byte array by 4 */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase, tmp;
 int	*p1, *p2, *p3, *p4, n2, nst3, tmpint, k, nsd2;
 pin = ain;
				/* compute new ns and ms */
 ns = n * 4;	ms = m * 4;	/* no leftovers for x4 */
 *nx2 = ns;	*ny2 = ms;
 dim[0] = ns;	dim[1] = ms;
 nst3 = 3*ns;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase =(byte *) ((char *)h + sizeof(struct ahead));
 
 /* first load every fourth line */
 /*t1 = systime();*/
 for (j=0;j<m;j++) {
 for (i=0;i<n;i++)
   {tmp = *pin++; *pout++ = tmp; *pout++ = tmp; *pout++ = tmp; *pout++ = tmp;}
 pout += nst3;
 }

 /*t2 = systime();*/
 /* now dupe the lines, this is why we needed ns%4 = 0 */
 ns4 = ns/4;
 nsd2 = 3*ns4;
 p1 = (int *) poutbase;
 p2 = p1 + ns4;
 p3 = p2 + ns4;
 p4 = p3 + ns4;
 for (j=0;j<m;j++) {
  for (i=0;i<ns4;i++)
  	{ tmpint = *p1++; *p2++ = tmpint; *p3++ = tmpint; *p4++ = tmpint; }
 p1 += nsd2;	p2 += nsd2;	p3 += nsd2;	p4 += nsd2;
 }
 return 1;
 }
 /*------------------------------------------------------------------------- */
int zoomer8(ain, n, m, symout, nx2, ny2, sym_flag) /* internal zoom8 */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag;
 /* zoom a byte array by 8 */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase;
 union	{ byte	bb[4];   int  ii; } tmp;
 int	*p1, *p2, n2, nst7, tmpint, k, nsd2, delta;
 pin = ain;
				/* compute new ns and ms */
 ns = n * 8;	ms = m * 8;	/* no leftovers for x8 */
 *nx2 = ns;	*ny2 = ms;
 dim[0] = ns;	dim[1] = ms;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase =(byte *) ((char *)h + sizeof(struct ahead));
 p1 = (int *) poutbase;
 delta = 2*(n-1);

 /*t1 = systime();*/
 for (j=0;j<m;j++) {
 for (i=0;i<n;i++)
   { tmp.bb[0] = tmp.bb[1] = tmp.bb[2] = tmp.bb[3] = *pin++;
   *p1++ = tmp.ii;		*p1++ = tmp.ii;
   p2 = p1 + delta;
   *p2++ = tmp.ii;		*p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii;		*p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii;		*p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii;		*p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii;		*p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii;		*p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii;		*p2++ = tmp.ii;
   }
   p1 = p2;
   }
   return 1;
 }
 /*------------------------------------------------------------------------- */
int zoomer16(ain, n, m, symout, nx2, ny2, sym_flag) /* internal zoom16 */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag;
 /* zoom a byte array by 16 */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase;
 union	{ byte	bb[4];   int  ii; } tmp;
 int	*p1, *p2, n2, nst7, tmpint, k, nsd2, delta;
 pin = ain;
				/* compute new ns and ms */
 ns = n * 16;	ms = m * 16;	/* no leftovers for x16 */
 *nx2 = ns;	*ny2 = ms;
 dim[0] = ns;	dim[1] = ms;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase =(byte *) ((char *)h + sizeof(struct ahead));
 p1 = (int *) poutbase;
 delta = 4*(n-1);

 /*t1 = systime();*/
 for (j=0;j<m;j++) {
 for (i=0;i<n;i++)
   { tmp.bb[0] = tmp.bb[1] = tmp.bb[2] = tmp.bb[3] = *pin++;
   *p1++ = tmp.ii; *p1++ = tmp.ii; *p1++ = tmp.ii; *p1++ = tmp.ii;
   p2 = p1 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   p2 = p2 + delta;
   *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii; *p2++ = tmp.ii;
   }
   p1 = p2;
   }
   return 1;
 }
 /*------------------------------------------------------------------------- */
int compress2(ain, n, m, symout, nx2, ny2, sym_flag) /* internal compress2 */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag;
 /* compress a byte array by 2 */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase, *p1, *p2, tmp;
 int	n2, k, nx, ny, xq, nskip;
 pin = ain;
					/* compute new ns and ms */
 ns = n / 2;	ms = m / 2;
 *nx2 = ns;	*ny2 = ms;
 inc = ns -1;
 dim[0] = ns;	dim[1] = ms;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase =(byte *) ((char *)h + sizeof(struct ahead));
 
 ny = ms;
 nskip = n + n%2;
 p1 = pin;	p2 = p1 + n;
 while (ny--) { nx = ns;
   while (nx--) { 
   /* sum the 4 pixels here */
   xq = *p1++;  xq += *p2++;
   xq += *p1++; xq += *p2++;
   *pout++ = (byte) ( xq / 4 );
   } p1 += nskip;	p2 += nskip; }
 return 1;
 }
 /*------------------------------------------------------------------------- */
int compress4(ain, n, m, symout, nx2, ny2, sym_flag) /* internal compress4 */
 byte	*ain;
 int	n, m, *symout, *nx2, *ny2, sym_flag;
 /* compress a byte array by 4 */
 /* requires the original address, some size info */
 /* creates a symbol $zoom_temp to allow re-use of memory for some calls */
 {
 int	ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq;
 struct	ahead	*h;
 byte	*pin, *pout, *poutbase, *p1, *p2, *p3, *p4, tmp;
 int	n2, k, nx, ny, xq, nskip;
 pin = ain;
					/* compute new ns and ms */
 ns = n / 4;	ms = m / 4;
 *nx2 = ns;	*ny2 = ms;
 inc = ns -1;
 dim[0] = ns;	dim[1] = ms;
 /* depending on the state of sym_flag, we create a temporary symbol
 or use $zoom_temp, the latter is for tvplanezoom */
 if (sym_flag) {
 result_sym = array_scratch(0, 2, dim);
 if (result_sym < 0) return -1;
 } else {
 result_sym = find_sym(zoomtemp);
 if (redef_array(result_sym, 0, 2, dim) != 1) return -1;
 }
 *symout = result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 pout = poutbase =(byte *) ((char *)h + sizeof(struct ahead));
 
 ny = ms;
 nskip =3* n + n%4;
 p1 = pin;	p2 = p1 + n;	p3 = p2 + n;	p4 = p3 + n;
 while (ny--) { nx = ns;
   while (nx--) { 
   /* sum the 16 pixels here */
   xq = *p1++;  xq += *p2++;  xq += *p3++;  xq += *p4++;
   xq += *p1++; xq += *p2++;  xq += *p3++;  xq += *p4++;
   xq += *p1++; xq += *p2++;  xq += *p3++;  xq += *p4++;
   xq += *p1++; xq += *p2++;  xq += *p3++;  xq += *p4++;
   *pout++ = (byte) ( xq / 16 );
   } p1 += nskip;  p2 += nskip;  p3 += nskip;  p4 += nskip; }
 return 1;
 }
 /*------------------------------------------------------------------------- */
int ana_zoom(narg,ps)				/* zoom function */
 /* zoom a byte array by 2 or more */
 int	narg, ps[];
 {
 int	n, m, ns, ms, dim[2], j, i, inc, ns4, result_sym, leftover, iq, type;
 struct	ahead	*h;
 byte	*pin;
 int	zoom = 2;
 double t1, t2;
				 /* first argument must be a 2-D array */
 t1 = systime();
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(95);
 type = sym[iq].type;
 if (type != 0) return execute_error(118);
 h = (struct ahead *) sym[iq].spec.array.ptr;
 pin = (byte *) ((char *)h + sizeof(struct ahead));
					/* we want a 1 or 2-D array only */
 if ( h->ndim > 2 ) return execute_error(95);
 n = h->dims[0];	  if ( h->ndim > 1 ) m = h->dims[1]; else m = 1;
 if (narg > 1 )  if (int_arg_stat(ps[1], &zoom) != 1) return -1;
 /* only certain zooms allowed, others hit default in switch below */
 if (zoom_test && (zoom >= 4)) {
   printf("alternate zoom\n");
   zoomerx(pin, n, m, &result_sym, &ns, &ms, 1, zoom, 0);

 } else {

 switch (zoom) {
 case 2: zoomer2(pin, n, m, &result_sym, &ns, &ms, 1); break;
 case 3: zoomer3(pin, n, m, &result_sym, &ns, &ms, 1); break;
 case 4: zoomer4(pin, n, m, &result_sym, &ns, &ms, 1); break;
 case 8: zoomer8(pin, n, m, &result_sym, &ns, &ms, 1); break;
 case 16: zoomer16(pin, n, m, &result_sym, &ns, &ms, 1); break;
 case -2: compress2(pin, n, m, &result_sym, &ns, &ms, 1); break;
 case -4: compress4(pin, n, m, &result_sym, &ns, &ms, 1); break;
 default:
   printf("ZOOM: illegal zoom factor of %d, only -4,-2,1,2,3,4,8 allowed\n", zoom);
   return -1;
 }
 }
 t2 = systime();
 printf("time = %12.4f\n", t2-t1);
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_resample(narg,ps)			/* resample function */
 /* the call is y = resample(x, xc, yc)
 where x is the array we want to re-sample and xc and yc are the (x,y)
 coordinates for the result y. Y will have the dimensions of xc and yc
 (which must be the same) and the type of x  */
 int	narg, ps[];
 {
 int	m, nd, ntot, ntot2, iq, jq, j, result_sym;
 struct	ahead	*h;
 float	*xcbase, *ycbase;
 iq = ps[0];
 stretchmark_flag = stretchmark;
				 /* first arg must be a 2-D array */
 if ( sym[iq].class != 4 ) return execute_error(66);
 regrid_type = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 if ( h->ndim != 2 ) return execute_error(101);
 base.l = (int *) ((char *)h + sizeof(struct ahead));
 n = h->dims[0];	m = h->dims[1];
 nm1 = n - 1; mm1 = m - 1; nm2 = n -2; mm2 = m -2;
 fnm1 = (float) n - 1.0;	fnm5 = (float) n -0.5;
 fmm1 = (float) m - 1.0;	fmm5 = (float) m -0.5;
				/* check xc and yc, must be the same size */
 if ( sym[ ps[1] ].class != 4 ) return execute_error(95);
 iq = ana_float( 1, &ps[1] );	/* float xc, makes things easier */
 h = (struct ahead *) sym[iq].spec.array.ptr;
 xcbase = (float *) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 ntot = 1; for (j=0;j<nd;j++) ntot *= h->dims[j]; /* # of elements for xc */

 if ( sym[ ps[2] ].class != 4 ) return execute_error(95);
 jq = ana_float( 1, &ps[2] );	/* float yc, makes things easier */
 h = (struct ahead *) sym[jq].spec.array.ptr;
 ycbase = (float *) ((char *)h + sizeof(struct ahead));
 
 /* yc must agree with xc in number of elements */
 nd = h->ndim;
 ntot2 = 1; for (j=0;j<nd;j++) ntot2 *= h->dims[j]; /* # of elements for xc */
 if ( ntot != ntot2 ) return execute_error(103);

					 /* make the output array */
 result_sym = array_clone(iq, regrid_type);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 out.l = (int *) ((char *)h + sizeof(struct ahead));
 
 /* now just loop over ntot and load out using bicubic_f() */
 
 while (ntot--) {
 xl = *xcbase++;	yl = *ycbase++;
 /* xl, yl is the place, now do a cubic interpolation for value */
 switch (resample_type) {
   case BI_CUBIC_SMOOTH:	bicubic_f();  break;
   case BI_CUBIC:		bicubic_fc(); break;
 }
 }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_regrid(narg,ps)				/* regrid function */
 /* call is Y = REGRID( X, XG, YG, DX, DY) */
 int	narg, ps[];
 {
 regridtypeflag = 0;	/* for a nearest neighbor regrid */
 return regrid_common(narg,ps);
 }
 /*------------------------------------------------------------------------- */
int ana_regrid3(narg,ps)			/* regrid3 function */
 /* call is Y = REGRID3( X, XG, YG, DX, DY) */
 /* similar to regrid but uses bicubic interpolation rather than nearest
  neighbor for pixel value, still uses bilinear for grid, also uses stretch
  marks for boundaries */
 int	narg, ps[];
 {
 regridtypeflag = 1;	/* for a bicubic with stretchmarks regrid */
 stretchmark_flag = 1;
 return regrid_common(narg,ps);
 }
 /*------------------------------------------------------------------------- */
int ana_regrid3ns(narg,ps)			/* regrid3ns function */
 /* call is Y = REGRID3( X, XG, YG, DX, DY) */
 /* similar to regrid but uses bicubic interpolation rather than nearest
  neighbor for pixel value, still uses bilinear for grid, without stretch
  marks for boundaries (hence the ns)*/
 int	narg, ps[];
 {
 regridtypeflag = 1;	/* for a bicubic w/o stretchmarks regrid */
 stretchmark_flag = 0;
 return regrid_common(narg,ps);
 }
 /*------------------------------------------------------------------------- */
int image_magrotate(void *, int, int, int, float, float, float, float,void **, int *, int *,int, int, int );
 /*------------------------------------------------------------------------- */
int ana_testimagerotate(int narg,int ps[])	/* to test a standalone module */
 {
 /* call is testimagerotate(image, theta, mag, dx, dy, nan2zero_flag) */
 void *outarray;
 float  theta, mag, dx, dy;
 int	n, m, data_type_input, stat, nx, ny, iq, dim[2], nan2zero_flag;
 union	types_ptr jpbase;
 struct	ahead	*h;
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(95);
 data_type_input = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 base.l = (int *) ((char *)h + sizeof(struct ahead));
 if ( h->ndim != 2 ) return execute_error(95);
 n = h->dims[0];		m = h->dims[1];
 regridtypeflag = 1;	/* for a bicubic w/o stretchmarks regrid */
 stretchmark_flag = 0;
 theta = float_arg(ps[1]);
 mag = float_arg(ps[2]);
 dx = float_arg(ps[3]);
 dy = float_arg(ps[4]);
 if (narg > 5)  nan2zero_flag = int_arg(ps[5]); else nan2zero_flag = 0;
//  printf("base.f = %d\n", base.f );
//  printf("*base.f = %f\n",*base.f );
//  printf("outarray = %#x\n",outarray );
//  printf("&outarray = %#x\n",&outarray );
//  printf("nan2zero_flag = %d\n", nan2zero_flag);
 stat = image_magrotate((void *) base.l,n,m,data_type_input,theta,mag,dx,dy,&outarray,&nx,&ny,
     regridtypeflag, stretchmark_flag, nan2zero_flag);
// printf("result nx, ny = %d, %d\n", nx, ny);
 /* make a symbol for result and copy contents */
 dim[0] = nx;	dim[1] = ny;
 result_sym = array_scratch(data_type_input, 2, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 jpbase.l = (int *) ((char *)h + sizeof(struct ahead));
//  printf("copy size = %d\n", nx*ny*ana_type_size[data_type_input]);
//  printf("after image_magrotate call\n");
//  printf("outarray = %#x\n",outarray );
//  printf("&outarray = %#x\n",&outarray );
 bcopy(outarray, (void *) jpbase.l, nx*ny*ana_type_size[data_type_input]);
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int regrid_common(narg,ps)			/* with branches for type */
 int	narg, ps[];
 {
 int	iq, nx, ny, m, ng, mg, ns, ms, ngrun, dim[2];
 int	iprun, jrun, jprun, ig, ic, jc, jq, result_sym;
 int	i, j, ind;
 float	fn, fm, yrun, ax, bx, cx, dx, ay, by, cy, dy, xq, beta, xinc, yinc;
 float	xl0, yl0;
 struct	ahead	*h;
 union	types_ptr xgbase, ygbase, jpbase, jbase, ipbase, bb;
				 /* first argument must be a 2-D array */
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(95);
 regrid_type = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 base.l = (int *) ((char *)h + sizeof(struct ahead));
					 /* we want a 2-D array only */
 if ( h->ndim != 2 ) return execute_error(95);
 n = h->dims[0];		m = h->dims[1];
 if ( n < 2 || m < 2 ) return execute_error(95);
 fn = (float) n;	fm = (float) m;
				 /* check xg and yg, must be the same size */
 if ( sym[ ps[1] ].class != 4 ) return execute_error(95);
 iq = ana_float( 1, &ps[1] );
 h = (struct ahead *) sym[iq].spec.array.ptr;
 xgbase.l = (int *) ((char *)h + sizeof(struct ahead));
					 /* we want a 2-D array only */
 if ( h->ndim != 2 ) return execute_error(96);
 ng = h->dims[0];		mg = h->dims[1];
 if ( ng < 2 || mg < 2 ) return execute_error(96);
 if ( sym[ ps[2] ].class != 4 ) return execute_error(95);
 iq = ana_float( 1, &ps[2] );
 h = (struct ahead *) sym[iq].spec.array.ptr;
 ygbase.l = (int *) ((char *)h + sizeof(struct ahead));
					 /* we want a 2-D array only */
 if ( h->ndim != 2 ) return execute_error(96);
 if (ng != h->dims[0] ||	mg != h->dims[1]) return execute_error(97);
			 /* get dx and dy which are put in ns and ms */
 ns = int_arg( ps[3] );	ms = int_arg( ps[4] );
 ngrun = ng;
 /* generate the output array */
 ng--;	mg--;
 nx = ng * ns;	ny = mg * ms;	dim[0] = nx;	dim[1] = ny;
 if ( nx > maxregridsize || ny > maxregridsize ) {
	 printf("result array in REGRID would be %d by %d\n",nx,ny);
	 printf("which exceeds current !maxregridsize (%d)\n",maxregridsize);
	 return -1; }
 result_sym = array_scratch(regrid_type, 2, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 jpbase.l = (int *) ((char *)h + sizeof(struct ahead));
 yrun = 1.0/ (float) ms;
 i = ana_type_size[regrid_type];
 /* various increments, in bytes! */
 iprun = ns * i;	jrun = ng * iprun;	jprun = ms * jrun;
 ind = 0;
 /* before any looping, branch on the type of regrid we are doing */
 switch (regridtypeflag) {
 case 0:	/* nearest neighbor regrid */
			 /* start 4 level loop */
 while (mg--) 	{					/* outer mg loop */
 ipbase.b = jpbase.b;	jpbase.b = ipbase.b + jprun;
 ig = ng;	j = ind;
							 /* ig loop */
 while (ig--) {
				 /* get the 8 grid values for this cell */
 i = j;
 /*printf("i = %d\n",i);*/
 ax = xgbase.f[i];	ay = ygbase.f[i];	i++;
 bx = xgbase.f[i] - ax;	by = ygbase.f[i] - ay;	i += ngrun;
 dx = xgbase.f[i] - ax;	dy = ygbase.f[i] - ay;	i--;
 cx = xgbase.f[i] - ax;	cy = ygbase.f[i] - ay;
 dx = dx - bx - cx;	dy = dy - by - cy;	j++;
 xq = 1.0 /(float) ns;
 bx *= xq;	by *= xq;	dx *= xq * yrun;	dy *= xq * yrun;
 cx *= yrun;	cy *= yrun;
 /* setup for jc loop */
 jbase.b = ipbase.b;	ipbase.b = ipbase.b + iprun;
 beta = 0.0;	jc = ms;
 while (jc--) {
 /* setup for ic loop */
 out.b = jbase.b;	jbase.b = jbase.b + jrun;
 xl = ax + beta * cx + 0.5;	yl = ay + beta * cy + 0.5;
 xinc = (bx + beta * dx);	yinc = (by + beta * dy);
 ic = ns;
 /* type switch for inner loop */
 switch (regrid_type) {
 case 0:
   while (ic--) {
   if ( xl < 0 || xl >= fn || yl < 0 || yl >= fm) *out.b++ = 0;
   /* else { iq = xl;	jq = yl;  iq += n*jq; *out.b++ = *(base.b + iq); }*/
   else { *out.b++ = *(base.b + (int) xl + n * (int) yl); }
   xl += xinc;	yl += yinc;	} break;
   case 1:
   while (ic--) {
   if ( xl < 0 || xl >= fn || yl < 0 || yl >= fm) *out.w++ = 0;
   else { *out.w++ = *(base.w + (int) xl + n * (int) yl); }
   xl += xinc;	yl += yinc;	} break;
   case 2:
   while (ic--) {
   if ( xl < 0 || xl >= fn || yl < 0 || yl >= fm) *out.l++ = 0;
   else { *out.l++ = *(base.l + (int) xl + n * (int) yl); }
   xl += xinc;	yl += yinc;	} break;
   case 3:
   while (ic--) {
   if ( xl < 0 || xl >= fn || yl < 0 || yl >= fm) *out.f++ = 0;
   else { *out.f++ = *(base.f + (int) xl + n * (int) yl); }
   xl += xinc;	yl += yinc;	} break;
   case 4:
   while (ic--) {
   if ( xl < 0 || xl >= fn || yl < 0 || yl >= fm) *out.d++ = 0;
   else { *out.d++ = *(base.d + (int) xl + n * (int) yl); }
   xl += xinc;	yl += yinc;	} break;
   }				/* end of switch on type for inner loop */
   beta++;
   }
   }
   ind += ngrun;
   }
   break;		/* end of nearest neighbor regrid case */
 
 case 1:
  {	/* bicubic with or without stretchmarks case */
   mm1 = m-1;  nm1 = n-1;  nm2 = n-2; mm2 = m-2;
   fnm1 = fn - 1.0;	fnm5 = fn -0.5;
   fmm1 = fm - 1.0;	fmm5 = fm -0.5;
			  /* start 4 level loop */
  /* printf("mg, ng = %d, %d\n", mg, ng);*/
  do 	{						/* outer mg loop */
  ipbase.b = jpbase.b;	jpbase.b = ipbase.b + jprun;
  ig = ng;	j = ind;
							 /* ig loop */
  do	{
				  /* get the 8 grid values for this cell */
  i = j;
  /* printf("i = %d\n",i);*/
  ax = xgbase.f[i];	ay = ygbase.f[i];	i++;
  bx = xgbase.f[i] - ax;	by = ygbase.f[i] - ay;	i += ngrun;
  dx = xgbase.f[i] - ax;	dy = ygbase.f[i] - ay;	i--;
  cx = xgbase.f[i] - ax;	cy = ygbase.f[i] - ay;
  dx = dx - bx - cx;	dy = dy - by - cy;	j++;
  xq = 1.0 /(float) ns;
  bx *= xq;	by *= xq;	dx *= xq * yrun;	dy *= xq * yrun;
  cx *= yrun;	cy *= yrun;
						  /* setup for jc loop */
  jbase.b = ipbase.b;	ipbase.b = ipbase.b + iprun;
  beta = 0.0;	jc = ms;
  /* LS notes that adding increments to xl and yl for the inner loop below
  results in roundoff errors for F*4 computations, see notes further below */
  while (jc--) {
    /* setup for ic loop */
    /* some optimizer trouble here on Alpha, re-arrange */
    yinc = (by + beta * dy);
    //ic = ns;
    out.b = jbase.b;	jbase.b = jbase.b + jrun;
    /* xl0 and yl0 added as bases for position computation */
    xl = xl0 = ax+beta*cx;  yl = yl0 = ay+beta*cy; /* different from regrid */
    xinc = (bx + beta * dx);
    /*printf("xl, yl, xinc, yinc = %f %f %f %f\n", xl, yl, xinc, yinc);*/
    /* except for 2 lines (and some declarations), identical (?) to regrid */
    /* to this point */
	    /* use stretch marks on boundaries for cubic interpolation */
    /* the original loop commented out below, faster but not as accurate */
  //    do {
  //  	  switch (resample_type) {
  // 	  case BI_CUBIC_SMOOTH:		bicubic_f();  break;
  // 	  case BI_CUBIC:		bicubic_fc(); break;
  // 	  }
  //  	  xl += xinc;	yl += yinc;
  //    } while (--ic > 0);
    for (ic=0; ic < ns; ic++) {
 	   xl = xl0 + ic*xinc;	yl = yl0 + ic*yinc;
 	   switch (resample_type) {
	     case BI_CUBIC_SMOOTH:	bicubic_f();  break;
	     case BI_CUBIC:		bicubic_fc(); break;
	   }
    }
    beta++;
  }
  } while (--ig > 0);
  ind += ngrun;
  } while (--mg > 0);
  } break;		/* end of bicubic with stretchmarks case */
 
 default:	return execute_error(3);
 }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_compress(narg,ps)			/* compress function */
 /* compresses image data by an integer factor */
 /*  xc = compress(x, cx, [cy])  */
 int	narg, ps[];
 {
 int	n, iq, i, j, cx, cy, nx, ny, type, result_sym, dim[2], nd, nxx;
 float	xq, fac;
 double	dq, dfac;
 struct	ahead	*h;
 union	types_ptr q1, q2, p, base;
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(3);
 type = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 q1.l = (int *) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 /* we want a 1 or 2-D array only */
 if ( nd > 2 ) return execute_error(92);
 if (int_arg_stat(ps[1], &cx) != 1) return -1;
 cy = cx;
 if (narg > 2 )  if (int_arg_stat(ps[2], &cy) != 1) return -1;
 /* get resultant sizes, check for degenerates */
 cx = MAX(cx , 1);	cy = MAX(cy , 1);	/* otherwise we could bomb */
 nx = h->dims[0] / cx;  if (nx < 1) { cx = h->dims[0];  nx = 1; }
 if (nd > 1) ny = h->dims[1]; else ny = 1;
 ny = ny / cy;  if (ny < 1) { cy = h->dims[1];  ny = 1; }
 if (nd < 2) { cy = 1; ny = 1;}	/* or we'll be sorry, 2/4/95 */
 dim[0] = nx;	dim[1] = ny;	nxx = h->dims[0];
 result_sym = array_scratch(type, nd, dim);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 q2.l = (int *) ((char *)h + sizeof(struct ahead));
 fac = 1.0 / ( (float) cx * (float) cy );
 n = nxx - cx;					/* step bewteen lines */
 switch (type) {
 case 0:
 while (ny--) {	base.b = q1.b;  iq = nx;
   while (iq--) { p.b = base.b; xq = 0.0;
   for (j=0;j<cy;j++) { for (i=0;i<cx;i++) xq += *p.b++; p.b += n; }
   *q2.b++ = (byte) ( xq * fac );  base.b += cx; } q1.b +=  nxx * cy; }
 break;
 case 1:
 while (ny--) {	base.w = q1.w;  iq = nx;
   while (iq--) { p.w = base.w; xq = 0.0;
   for (j=0;j<cy;j++) { for (i=0;i<cx;i++) xq += *p.w++; p.w += n; }
   *q2.w++ = (short) ( xq * fac );  base.w += cx; } q1.w +=  nxx * cy; }
 break;
 case 2:
 while (ny--) {	base.l = q1.l;  iq = nx;
   while (iq--) { p.l = base.l; xq = 0.0;  
   for (j=0;j<cy;j++) { for (i=0;i<cx;i++) xq += *p.l++; p.l += n; }
   *q2.l++ = (int) ( xq * fac );  base.l += cx; } q1.l +=  nxx * cy; }
 break;
 case 3:
 while (ny--) {	base.f = q1.f;  iq = nx;
   while (iq--) { p.f = base.f; xq = 0.0;  
   for (j=0;j<cy;j++) { for (i=0;i<cx;i++) xq += *p.f++; p.f += n; }
   *q2.f++ =  ( xq * fac );  base.f += cx; } q1.f +=  nxx * cy; }
 break;
 case 4:
 dfac = 1.0 / ( (double) cx * (double) cy );
 while (ny--) {	base.d = q1.d;  iq = nx;
   while (iq--) { p.d = base.d; dq = 0.0;
   for (j=0;j<cy;j++) { for (i=0;i<cx;i++) dq += *p.d++; p.d += n; }
   *q2.d++ = (double) ( dq * dfac );  base.d += cx; } q1.d +=  nxx * cy; }
 break;
 }
 return result_sym;
 }
  /*------------------------------------------------------------------------- */
 /* used in randomu and noise, these share the seed */
#define IM	2147483647
#define IQ	127773
#define IR	2836
#define IA	16807
#define NTAB	32
#define EPS	1.2E-7
#define RNMX	1.-EPS
#define NDIV	1+(IM-1)/NTAB
 int	seed = -123459876;	/* initial seed */
 static	int	iy=0, iv[NTAB], ndiv = NDIV;
 static	float	am = 1./2147483647., rnmx = RNMX;
 /* end of defs and statics for random and noise */
 /*------------------------------------------------------------------------- */
void init_random()
 {
 int	k, j;
 seed = MAX(-seed, 1);
 for (j=NTAB+7;j>=0;j--) {	/* toss away first 8 and then load shuffle */
 k = seed / IQ;
 seed = IA * (seed -k*IQ) -IR*k;
 if (seed < 0) seed = seed +IM;
 if (j<NTAB) iv[j] = seed;
 }
 iy = iv[0];
 }	/* end of initialization */
 /*------------------------------------------------------------------------- */
int ana_randomu(narg,ps)
 /*create an array of random F*4 elements in the [0,1.0] range (exclusive) */
 int narg, ps[];
 {
 float *p;
 int	k;
 int	pd[8],j, result_sym, n, nd, i;
 struct	ahead	*h;
 for (j=0;j<narg;j++)			/*get the dimensions */
 if (int_arg_stat(ps[j], &pd[j]) != 1) return -1;
 result_sym = array_scratch(3,narg,pd);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 p = (float *) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j]; /*# of elements */
 /* check if we are initializing */
 if (seed <= 0) init_random();
 for (j=0;j<n;j++) {
 k = seed / IQ;
 seed = IA * (seed - k*IQ) -IR*k;
 if (seed < 0) seed = seed +IM;
 i = iy / ndiv;
 iy = iv[i];
 iv[i] = seed;
 *p++ = MIN(am * seed, rnmx);
 }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_randomn(narg,ps)
 /*create a normal distribution of random #'s, centered at 0 with a width
 of 1.0
 uses Box-Muller transformation, given 2 uniformly random #'s (0 to 1 range)
 x1 and x2 then         n1 = sqrt(-2.*alog(x1))*cos(2*pi*x2)
                        n2 = sqrt(-2.*alog(x1))*sin(2*pi*x2)
 */
 int narg, ps[];
 {
 float	x1, x2, *p;
 struct	ahead	*h;
 int	result_sym, j, n2, nd, n, k, i;
 /* first get a uniform distribution */
 /* 9/18/95, L.S. noticed that if the total number is odd, the last value is
 always >0 because we used a zero angle.
 */
 result_sym = ana_randomu(narg,ps);
 if (result_sym <= 0) return result_sym;
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 p = (float *) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j]; /*# of elements */
 n2 = n / 2;
 for (j=0;j<n2;j++) {
 x1 = sqrt(-2.0 * log( *p ) );
 x2 = *(p+1) * 6.2831853;
 *p++ = x1 * cos( x2);
 *p++ = x1 * sin( x2);
 }
 /* an odd one left over ? */
 if (n > (2*n2) ) {
 x1 = sqrt(-2.0 * log( *p ));
 k = seed / IQ;
 seed = IA * (seed - k*IQ) -IR*k;
 if (seed < 0) seed = seed +IM;
 i = iy / ndiv;
 iy = iv[i];
 iv[i] = seed;
 x2 = MIN(am * seed, rnmx)* 6.2831853;
 *p++ = x1 * cos( x2);
 }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_noise(narg,ps)
 /* add statistical noise to data, y = noise(x, [photons_per_count]) */
 int narg, ps[];
 {
 float *p, *q, ppc = 1.0, x1, x2, y1, y2;
 int	k;
 int	j, result_sym, n, nd, i,iq;
 struct	ahead	*h;
 /* input array, always float, no support for F*8 yet */
 iq = ana_float(1,ps);
 q = (float *) ((char *) sym[iq].spec.array.ptr + sizeof(struct ahead));
 result_sym = array_clone(iq, sym[iq].type);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 p = (float *) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j]; /*# of elements */
 /* if present, get the scale factor; this is physically the photons
 per lsb for a CCD */
 if (narg > 1) if (float_arg_stat(ps[1], &ppc) != 1) return -1;
 if (ppc <= 0.0) {
 	printf("NOISE - illegal value for photons per count = %f\n", ppc);
	return -1; }
 ppc = 1.0/sqrt(ppc);
 /* check if we are initializing */
 if (seed <= 0) init_random();
 while (1) {	/* do 2 at a time because of Box-Muller transformation */
 k = seed / IQ;
 seed = IA * (seed - k*IQ) -IR*k;
 if (seed < 0) seed = seed +IM;
 i = iy / ndiv;
 iy = iv[i];
 iv[i] = seed;
 x1 = MIN(am * seed, rnmx);
 x1 = sqrt(-2.0 * log( x1 ) ) * ppc;	/* note ppc factor included here */
 k = seed / IQ;
 seed = IA * (seed - k*IQ) -IR*k;
 if (seed < 0) seed = seed +IM;
 i = iy / ndiv;
 iy = iv[i];
 iv[i] = seed;
 x2 = MIN(am * seed, rnmx);
 x2 = x2 * 6.2831853;
 y1 = x1 * cos( x2);
 y2 = x1 * sin( x2);
 /* have the 2 random numbers needed for the next 2 data values */
 *p = *q + y1 * sqrt( *q);	/* note ppc included in y1 via x1 */
 n--;
 if (n <= 0) break;
 p++;	q++;
 /* now the other (if odd the last y2 not used) */
 *p = *q + y2 * sqrt( *q);	/* note ppc included in y2 via x1 */
 n--;
 if (n <= 0) break;
 p++;	q++;
 }
 return result_sym;
 }
 /*------------------------------------------------------------------------- */
int ana_sort(narg,ps)
 /* 6/10/97 - include string pointers */
 /*sort contents of a copy of an array, unless the array is a temp,
 then we sort the array in place */
 int	narg, ps[];
 {
 struct	ahead	*h;
 int	iq, type, result_sym, j, nd, n;
 iq = ps[0];
 if ( sym[iq].class == ARRAY ) {
 int	*p;
 type = sym[iq].type;
 /* result_sym = array_clone(iq,type); */
 result_sym = find_next_temp();
 /* make a copy of original array, we sort the copy
 Note that if
 original is already a temp, there is no copy and it is sorted in place */
 if ( ana_replace( result_sym, iq) != 1 ) return execute_error(3);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 p = (int *) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j]; /*# of elements */
 if ( n <= 1 ) return result_sym;
 /*note - the default uses Press etal's heap sort, experiments with shell sort
 indicated it was slower for all cases that reliable times could be obtained
 on (about n>=10000) */
 if (sort_flag == 0 ) {
 /* printf("heap sort\n");*/
 switch (type) {
 case 0: sort_b( n, p);	return result_sym;
 case 1: sort_w( n, p);	return result_sym;
 case 2: sort_l( n, p);	return result_sym;
 case 3: sort_f( n, p);	return result_sym;
 case 4: sort_d( n, p);	return result_sym;
 }
 } else {
 /* printf("shell sort\n");*/
 switch (type) {
 case 0: shell_b( n, p);	return result_sym;
 case 1: shell_w( n, p);	return result_sym;
 case 2: shell_l( n, p);	return result_sym;
 case 3: shell_f( n, p);	return result_sym;
 case 4: shell_d( n, p);	return result_sym;
 }
 }
  } else if ( sym[iq].class == STRING_PTR ) {
 /* we have a string array */
 char	**p;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 p = (char **) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j];	/*# of elements */
 /* make a copy if not a temp */
 result_sym = find_next_temp();
 if ( ana_replace(result_sym, iq) != 1 ) return execute_error(3);
 p = (char **) ((char *) sym[result_sym].spec.array.ptr+sizeof(struct ahead));
 /* don't sort if only 1 !, causes problems */
 if (n>1) sort_s(n, p);		return result_sym;

 } else return execute_error(66);
 return execute_error(3);	/* we can't get here! */
 }
 /*------------------------------------------------------------------------- */
int ana_index(narg,ps)
 /* construct a sorted index table for an array */
 /* the index is returned as a long array of the same size */
 /* uses heap sort only */
  int	narg, ps[];
 {
 /* 6/10/97 - include string pointers */
 struct	ahead	*h;
 int	iq, type, result_sym, j, nd, n;
 iq = ps[0];
 if ( sym[iq].class == ARRAY ) {
 int  *p, *q;
 /* array case */
 type = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 q = (int *) ((char *)h + sizeof(struct ahead));
 result_sym = array_clone(iq,2);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 p = (int *) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j]; /*# of elements */
 if ( n <= 1 ) return result_sym;
 switch (type) {
 case 0: indexx_b( n, q, p);	return result_sym;
 case 1: indexx_w( n, q, p);	return result_sym;
 case 2: indexx_l( n, q, p);	return result_sym;
 case 3: indexx_f( n, q, p);	return result_sym;
 case 4: indexx_d( n, q, p);	return result_sym;
 }
 } else if ( sym[iq].class == STRING_PTR ) {
 int  *p;
 char **q;
 /* we have a string array */
 h = (struct ahead *) sym[iq].spec.array.ptr;
 q = (char **) ((char *)h + sizeof(struct ahead));
 nd = h->ndim;
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j];	/*# of elements */
 result_sym = array_scratch( 2, nd, h->dims);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 p = (int *) ((char *)h + sizeof(struct ahead));
 indexx_s( n, q, p);	return result_sym;
 } else return execute_error(66);
 return execute_error(3);	/* we can't get here! */
 }
 /*------------------------------------------------------------------------- */
void bicubic_f()	/* internal routine for single pixel */
 {
 /* used by all (most?) routines that do bi-cubic interpolations, runs
 a little slower than the originals, perhaps because of the overhead
 in the call or other adjustments made */
 /* reference for bicubic method */
 /*    MODIFIED 1-26-84 TO USE LOWER NOISE CUBIC INTERPOLATION FOUND IN
      S.K. PARK & R.A. SCHOWENGERDT, COMP. VIS. & IM. PROC., VOL. 23,
      P. 258, 1983:  USES THEIR FORMULA WITH ALPHA = -0.5
@Article{park83image,
  author       = "S. K. Park and R. A. Schowengerdt",
  title	       = "Image Reconstruction by Parametric Cubic
                  Convolution",
  journal      = "Computer Vision, Graphics, and Image Processing",
  year	       = 1983,
  volume       = 23,
  pages	       = "258-272"
  }
*/
 int	i1, i2, i3, i4, j1, j2, j3, j4, iq;
 float	c1, c2, c3, c4, b1, b2, b3, b4, dx0, dx1, dx2, dx3, dx4, xq, yq;
 union	types_ptr bb;
 /* the location is in xl, yl; base is the pointer to array; out is
 pointer to output; both are unions */
 i2 = (int) xl;		j2 = (int) yl;
 if ( i2 >= 1 && i2 < nm2 ) {		/* normal interior */
	 dx0 = xl - i2; i1 = i2 - 1; i2 = 1; i3 = 2; i4 = 3;	 }
	 else {				/* edge cases */
	 /* check if stretchmarks required */
	 if (stretchmark_flag == 0) {
	  if ( xl < -0.5 || xl > fnm5) {
	   switch (regrid_type) {
	   case 0: *out.b++ = 0; break;
	   case 1: *out.w++ = 0; break;
	   case 2: *out.l++ = 0; break;
	   case 3: *out.f++ = 0; break;
	   case 4: *out.d++ = 0; break;
	   }
	  return;
	  }
	 }
	 i2 = MIN(i2, nm1);	i2 = MAX( i2, 0);
	 xq = MIN(xl, fnm1);	xq = MAX(xq, 0);
	 dx0 = xq - i2;
 /*	printf("dx0 = %f\n",dx0);*/
	 i1 = MIN(i2-1, nm1);	i1 = MAX( i1, 0);
	 i3 = MIN(i2+1, nm1);	/* i3 = MAX( i3, 0); */
	 i4 = MIN(i2+2, nm1);	/* i4 = MAX( i4, 0); */
	 i2 = i2 - i1;	i3 = i3 - i1;	i4 = i4 - i1;
	 }
 dx1 = 1.0 - dx0;  dx2 = -dx0 * 0.5;  dx3 = dx0 * dx2;
 dx4 = 3. * dx0 * dx3;
 c1 = dx2*dx1*dx1; c2 = 1.-dx4+5.*dx3; c3 = dx4-(dx2+4.*dx3); c4 = dx3*dx1;
 /* printf("i2,j2,xl,yl= %d %d %f %f\n",i2,j2,xl,yl); */
 if ( j2 >= 1 && j2 < mm2 ) {		/* normal interior */
	 j1 = j2 - 1; dx0 = yl - j2; j3 = n; j2 = n; j4 = n;	}
	 else {				/* edge cases */
	 /* check if stretchmarks required */
	 if (stretchmark_flag == 0) {
	  if ( yl < -0.5 || yl > fmm5) {
	   switch (regrid_type) {
	   case 0: *out.b++ = 0; break;
	   case 1: *out.w++ = 0; break;
	   case 2: *out.l++ = 0; break;
	   case 3: *out.f++ = 0; break;
	   case 4: *out.d++ = 0; break;
	   }
	  return;
	  }
	 }	 j2 = MIN(j2, mm1);	j2 = MAX( j2, 0);
	 xq = MIN(yl, fmm1);	xq = MAX(xq, 0);
	 dx0 = xq - j2;
 /*	printf("dy0 = %f, yl = %f, j2 = %d\n",dx0,yl,j2);*/
	 j1 = MIN(j2-1, mm1);	j1 = MAX( j1, 0);
	 j3 = MIN(j2+1, mm1);	/* j3 = MAX( j3, 0); */
	 j4 = MIN(j2+2, mm1);	/* j4 = MAX( j4, 0); */
	 j4 = (j4 - j3) * n;  j3 = (j3 - j2) * n;  j2 = (j2 - j1) *n;
	 }
 dx1 = 1.0 - dx0;  dx2 = -dx0 * 0.5;  dx3 = dx0 * dx2;
 dx4 = 3. * dx0 * dx3;
 b1 = dx2*dx1*dx1; b2 = 1.-dx4+5.*dx3; b3 = dx4-(dx2+4.*dx3); b4 = dx3*dx1;
 /* low corner offset */
 /* printf("index j1, j2, j3, j4 = %d %d %d %d\n", j1, j2, j3, j4);*/
 iq = i1 + j1 * n;
 /* printf("offset j1, j2, j3, j4 = %d %d %d %d\n", j1, j2, j3, j4);*/
 switch (regrid_type) {
 case 0:
 bb.b = base.b+iq;
 xq = b1*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 bb.b += j2;
 xq += b2*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 bb.b += j3;
 xq += b3*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 bb.b += j4;
 xq += b4*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 /* byte arrays need to be range restricted, too many simple minds out there */
 xq = MAX( 0, MIN( 255, xq));
 /* also we need to round rather than truncate, taking that extra care */
 *out.b++ = rint(xq); break;
 case 1:
 bb.w = base.w+iq;
 xq = b1*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 bb.w += j2;
 xq += b2*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 bb.w += j3;
 xq += b3*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 bb.w += j4;
 xq += b4*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 /* also we need to round rather than truncate, taking that extra care */
 *out.w++ = rint(xq); break;
 case 2:
 bb.l = base.l+iq;
 xq = b1*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 bb.l += j2;
 xq += b2*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 bb.l += j3;
 xq += b3*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 bb.l += j4;
 xq += b4*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 /* also we need to round rather than truncate, taking that extra care */
 *out.l++ = rint(xq); break;
 case 3:
 bb.f = base.f+iq;
 xq = b1*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 bb.f += j2;
 xq += b2*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 bb.f += j3;
 xq += b3*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 bb.f += j4;
 xq += b4*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 *out.f++ = xq; break;
 case 4:
 bb.d = base.d+iq;
 xq = b1*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 bb.d += j2;
 xq += b2*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 bb.d += j3;
 xq += b3*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 bb.d += j4;
 xq += b4*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 *out.d++ = xq; break;
 }
 return;
 }
 /*------------------------------------------------------------------------- */
void shift_bicubic(dx, nx, ny, inc, dline, base, out, type)
 /* internal routine for shift in one direction */
 float	dx;
 int	nx, ny, type, inc, dline;
 union	types_ptr base, out;
 {
 int	i1, i2, i3, i4, k;
 float	c1, c2, c3, c4, dx0, dx1, dx2, dx3, dx4;
 double	cd1, cd2, cd3, cd4, ddx0, ddx1, ddx2, ddx3, ddx4;
 int	nc, nzone1, nzone2, nzone3, nzone4, nzone5, nz1, nz2;
 int	nz3, nz4, rflag;

 nm1 = nx -1; rflag = 0;
 /* get the fraction shift and the integer shift */
 if (dx<0) {
 /* handle negative offsets by flipping and starting at end of array, this
 allows in and out array to always be the same without overwrites */
 rflag = nm1*inc;  /* not a flag but an offset */
 inc = - inc;
 dx = -dx;
 }
 i2 = (int) dx;
 dx0 = dx - i2;

 /* 3 zones, zone 1 gone with reversal technique */
 nzone2 = nzone3 = nzone4 = 0;
 if (i2 == 0) {
  nzone3 = nzone4 = 1;  nzone2 = nx - 2;
 }
 if (i2 > 0) {
  nzone4 =  MIN(i2 + 1, nx);
  if (nzone4 < nx) nzone3 = 1; else nzone3 = 0;
  nzone2 = nx - nzone4 - nzone3;
 } 
 /* get the first 4 values we will need, start at (i2-1)>0 */
 i2 = MIN(i2, nm1);	i2 = MAX( i2, 0);
 i1 = MIN(i2-1, nm1);	i1 = MAX( i1, 0);
 i3 = MIN(i2+1, nm1);
 i4 = MIN(i3+1, nm1);
 i1 = i1*inc;  i2 = i2*inc;  i3 = i3*inc;  i4 = i4*inc;
 if (type < 4) {
 /* coefficients are done in F*4 unless we need a F*8 result */
 dx1 = 1.0 - dx0;
 switch (resample_type) {
   case BI_CUBIC_SMOOTH:

   dx2 = -dx0 * 0.5;  dx3 = dx0 * dx2;
   dx4 = 3. * dx0 * dx3;
   c1 = dx2*dx1*dx1; c2 = 1.-dx4+5.*dx3; c3 = dx4-(dx2+4.*dx3); c4 = dx3*dx1;
   break;

   case BI_CUBIC:
   dx4 = -dx0*dx1;  c1 = dx4 * dx1;  c4 = dx0 * dx4;
   dx2 = dx0 * dx0; dx3 = dx0 * dx2; c2 = 1.-2.*dx2+dx3; c3 = dx0*(1.0+dx0-dx2);
   break;
 }


 } else {
 /* the F*8 case, use alternate coefficients */
 ddx0 = (double) dx0;
 ddx1 = 1.0 - ddx0;
 switch (resample_type) {
 case BI_CUBIC_SMOOTH:
 
 ddx2 = -ddx0 * 0.5;  ddx3 = ddx0 * ddx2;
 ddx4 = 3. * ddx0 * ddx3;
 cd1 = ddx2*ddx1*ddx1; cd2 = 1.-ddx4+5.*ddx3; cd3 = ddx4-(ddx2+4.*ddx3);
 cd4 = ddx3*ddx1;
 break;

 case BI_CUBIC:
 ddx4 = -ddx0*ddx1;  cd1 = ddx4 * ddx1;  cd4 = ddx0 * ddx4;
 ddx2 = ddx0 * ddx0; ddx3 = ddx0 * ddx2; cd2 = 1.-2.*ddx2+ddx3;
 cd3 = ddx0*(1.0+ddx0-ddx2);
 break;
 }
 
 
 }
 switch (type) {

 case 0:	/* I*1 image */
 {
   float	z4, z1, z2, z3, yq;
   byte	*p, *q;
   for (k=0;k<ny;k++) {
   p = base.b + k*dline + rflag;
   q = out.b + k*dline + rflag;
   z1 = (float) *(p + i1);	z2 = (float) *(p + i2);
   z3 = (float) *(p + i3);	z4 = (float) *(p + i4);
   p = p + i4;
   nz2 = nzone2;  nz3 = nzone3;  nz4 = nzone4;
  
   if (nz2) {
    yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
    /* byte arrays need to be range restricted */
    yq = MAX( 0, MIN( 255.0, yq));
    *q = (byte) yq;
    q = q + inc;
    nz2--;
    while (nz2--) {
     p = p + inc;
     z1 = z2;  z2 = z3;  z3 = z4;
     z4 = *p;
     yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
     yq = MAX( 0, MIN( 255.0, yq));
     *q = (byte) yq;
     q = q + inc;
    }
   }
   if (nz3) {
     z1 = z2;  z2 = z3;  z3 = z4;
     yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
     yq = MAX( 0, MIN( 255.0, yq));
     *q = (byte) yq;
     q = q + inc;
   }
   z4 = MAX( 0, MIN( 255.0, z4));
   while (nz4--) { *q = (byte) z4;  q = q + inc; }
   }
 }
 break;
 case 1:
 {
  float	z4, z1, z2, z3, yq;
  short	*p, *q;
  for (k=0;k<ny;k++) {
  p = base.w + k*dline + rflag;
  q = out.w  + k*dline + rflag;
  z1 = (float) *(p + i1);	z2 = (float) *(p + i2);
  z3 = (float) *(p + i3);	z4 = (float) *(p + i4);
  p = p + i4;
  nz2 = nzone2;  nz3 = nzone3;  nz4 = nzone4;
 
  if (nz2) {
   yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
   *q = (short) yq;
   q = q + inc;
   nz2--;
   while (nz2--) {
    p = p + inc;
    z1 = z2;  z2 = z3;  z3 = z4;
    z4 = *p;
    yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
    *q = (short) yq;
    q = q + inc;
   }
  }
  if (nz3) {
    z1 = z2;  z2 = z3;  z3 = z4;
    yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
    *q = (short) yq;
    q = q + inc;
  }
  while (nz4--) { *q = (short) z4;  q = q + inc; }
  }
 }
 break;
 case 2:
 {
  float	z4, z1, z2, z3, yq;
  int	*p, *q;
  for (k=0;k<ny;k++) {
  p = base.l + k*dline + rflag;
  q = out.l  + k*dline + rflag;
  z1 = (float) *(p + i1);	z2 = (float) *(p + i2);
  z3 = (float) *(p + i3);	z4 = (float) *(p + i4);
  p = p + i4;
  nz2 = nzone2;  nz3 = nzone3;  nz4 = nzone4;
 
  if (nz2) {
   yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
   *q = (int) yq;
   q = q + inc;
   nz2--;
   while (nz2--) {
    p = p + inc;
    z1 = z2;  z2 = z3;  z3 = z4;
    z4 = *p;
    yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
    *q = (int) yq;
    q = q + inc;
   }
  }
  if (nz3) {
    z1 = z2;  z2 = z3;  z3 = z4;
    yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
    *q = (int) yq;
    q = q + inc;
  }
  while (nz4--) { *q = (int) z4;  q = q + inc; }
  }
 }
 break;
 case 3:	/* floating F*4 */
 {
  float	z4, z1, z2, z3, yq;
  float	*p, *q;
  for (k=0;k<ny;k++) {
  p = base.f + k*dline + rflag;
  q = out.f + k*dline + rflag;
  z1 = *(p + i1);	z2 = *(p + i2);	z3 = *(p + i3);
  z4 = *(p + i4);
  p = p + i4;
  nz2 = nzone2;  nz3 = nzone3;  nz4 = nzone4;
 
  if (nz2) {
   yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
   *q = yq;
   q = q + inc;
   nz2--;
   while (nz2--) {
    p = p + inc;
    z1 = z2;  z2 = z3;  z3 = z4;
    z4 = *p;
    yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
    *q = yq;
    q = q + inc;
   }
  }
  if (nz3) {
    z1 = z2;  z2 = z3;  z3 = z4;
    yq = c1*z1 +c2*z2 + c3*z3 + c4*z4;
    *q = yq;
    q = q + inc;
  }
  while (nz4--) { *q = z4;  q = q + inc; }
  }
 }
 break;
 
 case 4:	/* F*8, double precision */
{
 double	z4, z1, z2, z3, yq;
 double	*p, *q;
 for (k=0;k<ny;k++) {
 p = base.d + k*dline + rflag;
 q = out.d + k*dline + rflag;
 z1 = *(p + i1);	z2 = *(p + i2);	z3 = *(p + i3);
 z4 = *(p + i4);
 p = p + i4;
 nz2 = nzone2;  nz3 = nzone3;  nz4 = nzone4;

 if (nz2) {
  yq = cd1*z1 +cd2*z2 + cd3*z3 + cd4*z4;
  *q = yq;
  q = q + inc;
  nz2--;
  while (nz2--) {
   p = p + inc;
   z1 = z2;  z2 = z3;  z3 = z4;
   z4 = *p;
   yq = cd1*z1 +cd2*z2 + cd3*z3 + cd4*z4;
   *q = yq;
   q = q + inc;
  }
 }
 if (nz3) {
   z1 = z2;  z2 = z3;  z3 = z4;
   yq = cd1*z1 +cd2*z2 + cd3*z3 + cd4*z4;
   *q = yq;
   q = q + inc;
 }
 while (nz4--) { *q = z4;  q = q + inc; }
 }
 }
 break;
 } return;
 }
 /*------------------------------------------------------------------------- */
int ana_shift3(int narg,int ps[])	/* shift3, bicubic image shift */
 /* y = shift3(x, dx, dy) */
 /* the direction of the shift is such that a positive dx moves the image
 to the left (toward smaller x) and likewise in y. Hence the original (dx,dy)
 becomes (0,0). Note that this is the opposite of the direction for rotate3
 when it is used only for a shift */
 
 {
 int	nx, ny, iq, outer, k, nd, n, nb, j;
 float	dx, dy;
 union	types_ptr base, out;
 struct	ahead	*h;
 iq = ps[0];
 if ( sym[iq].class != 4 ) return execute_error(95);
 regrid_type = sym[iq].type;
 h = (struct ahead *) sym[iq].spec.array.ptr;
 base.l = (int *) ((char *)h + sizeof(struct ahead));
					 /* we want a 2-D array only */
 nd = h->ndim;
 if ( nd > 2 ) return execute_error(95);
 nx = h->dims[0];
 if (nd == 2)  ny = h->dims[1];  else ny = 1;
 if (nx < 2) return execute_error(95);
 n = 1; for (j=0;j<nd;j++) n *= h->dims[j];	/* # of elements */
 nb = n * ana_type_size[regrid_type];		/* # of bytes */
 /* get the shifts */
 if (float_arg_stat(ps[1], &dx) != 1) return -1;
 dy = 0.0;
 if (narg>2) { if (float_arg_stat(ps[2], &dy) != 1) return -1; }

 result_sym = array_clone(iq, regrid_type);
 h = (struct ahead *) sym[result_sym].spec.array.ptr;
 out.l = (int *) ((char *)h + sizeof(struct ahead));

 /* first the shifts in x */
 if (dx == 0.0) { /* just copy over the original */
  bcopy ( (char *) base.b, (char *) out.b, nb );
 } else {
 shift_bicubic(dx, nx, ny, 1, nx, base, out, regrid_type);
 }
 /* if dy is zip, we are done */
 if (dy != 0.0) {
 shift_bicubic(dy, ny, nx, nx, 1, out, out, regrid_type);
 }

 return result_sym;
 }
 /*------------------------------------------------------------------------- */
void bicubic_fc()	/* internal routine for single pixel */
 {
 /* used by all (most?) routines that do bi-cubic interpolations, runs
 a little slower than the originals, perhaps because of the overhead
 in the call or other adjustments made */
 int	i1, i2, i3, i4, j1, j2, j3, j4, iq;
 float	c1, c2, c3, c4, b1, b2, b3, b4, dx0, dx1, dx2, dx3, dx4, xq, yq;
 union	types_ptr bb;
 /* the location is in xl, yl; base is the pointer to array; out is
 pointer to output; both are unions */
 i2 = (int) xl;		j2 = (int) yl;
 if ( i2 >= 1 && i2 < nm2 ) {		/* normal interior */
	 dx0 = xl - i2; i1 = i2 - 1; i2 = 1; i3 = 2; i4 = 3;	 }
	 else {				/* edge cases */
	 /* check if stretchmarks required */
	 if (stretchmark_flag == 0) {
	  if ( xl < -0.5 || xl > fnm5) {
	   switch (regrid_type) {
	   case 0: *out.b++ = 0; break;
	   case 1: *out.w++ = 0; break;
	   case 2: *out.l++ = 0; break;
	   case 3: *out.f++ = 0; break;
	   case 4: *out.d++ = 0; break;
	   }
	  return;
	  }
	 }
	 i2 = MIN(i2, nm1);	i2 = MAX( i2, 0);
	 xq = MIN(xl, fnm1);	xq = MAX(xq, 0);
	 dx0 = xq - i2;
 /*	printf("dx0 = %f\n",dx0);*/
	 i1 = MIN(i2-1, nm1);	i1 = MAX( i1, 0);
	 i3 = MIN(i2+1, nm1);	/* i3 = MAX( i3, 0); */
	 i4 = MIN(i2+2, nm1);	/* i4 = MAX( i4, 0); */
	 i2 = i2 - i1;	i3 = i3 - i1;	i4 = i4 - i1;
	 }

 dx1 = 1.0 - dx0;  dx4 = -dx0*dx1;  c1 = dx4 * dx1;  c4 = dx0 * dx4;
 dx2 = dx0 * dx0; dx3 = dx0 * dx2; c2 = 1.-2.*dx2+dx3; c3 = dx0*(1.0+dx0-dx2);
 /* printf("i2,j2,xl,yl= %d %d %f %f\n",i2,j2,xl,yl); */
 if ( j2 >= 1 && j2 < mm2 ) {		/* normal interior */
	 j1 = j2 - 1; dx0 = yl - j2; j3 = n; j2 = n; j4 = n;	}
	 else {				/* edge cases */
	 /* check if stretchmarks required */
	 if (stretchmark_flag == 0) {
	  if ( yl < -0.5 || yl > fmm5) {
	   switch (regrid_type) {
	   case 0: *out.b++ = 0; break;
	   case 1: *out.w++ = 0; break;
	   case 2: *out.l++ = 0; break;
	   case 3: *out.f++ = 0; break;
	   case 4: *out.d++ = 0; break;
	   }
	  return;
	  }
	 }	 j2 = MIN(j2, mm1);	j2 = MAX( j2, 0);
	 xq = MIN(yl, fmm1);	xq = MAX(xq, 0);
	 dx0 = xq - j2;
 /*	printf("dy0 = %f, yl = %f, j2 = %d\n",dx0,yl,j2);*/
	 j1 = MIN(j2-1, mm1);	j1 = MAX( j1, 0);
	 j3 = MIN(j2+1, mm1);	/* j3 = MAX( j3, 0); */
	 j4 = MIN(j2+2, mm1);	/* j4 = MAX( j4, 0); */
	 j4 = (j4 - j3) * n;  j3 = (j3 - j2) * n;  j2 = (j2 - j1) *n;
	 }

 dx1 = 1.0 - dx0;  dx4 = -dx0*dx1;  b1 = dx4 * dx1;  b4 = dx0 * dx4;
 dx2 = dx0 * dx0; dx3 = dx0 * dx2; b2 = 1.-2.*dx2+dx3; b3 = dx0*(1.0+dx0-dx2);
 /* low corner offset */
 /* printf("index j1, j2, j3, j4 = %d %d %d %d\n", j1, j2, j3, j4);*/
 iq = i1 + j1 * n;
 /* printf("offset j1, j2, j3, j4 = %d %d %d %d\n", j1, j2, j3, j4);*/
 switch (regrid_type) {
 case 0:
 bb.b = base.b+iq;
 xq = b1*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 bb.b += j2;
 xq += b2*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 bb.b += j3;
 xq += b3*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 bb.b += j4;
 xq += b4*(c1 * *(bb.b) + c2 * *(bb.b+i2)+ c3 * *(bb.b+i3) + c4 * *(bb.b+i4));
 /* byte arrays need to be range restricted, too many simple minds out there */
 xq = MAX( 0, MIN( 255, xq));
 /* also we need to round rather than truncate, taking that extra care */
 *out.b++ = rint(xq); break;
 case 1:
 bb.w = base.w+iq;
 xq = b1*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 bb.w += j2;
 xq += b2*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 bb.w += j3;
 xq += b3*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 bb.w += j4;
 xq += b4*(c1 * *(bb.w) + c2 * *(bb.w+i2)+ c3 * *(bb.w+i3) + c4 * *(bb.w+i4));
 /* also we need to round rather than truncate, taking that extra care */
 *out.w++ = rint(xq); break;
 case 2:
 bb.l = base.l+iq;
 xq = b1*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 bb.l += j2;
 xq += b2*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 bb.l += j3;
 xq += b3*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 bb.l += j4;
 xq += b4*(c1 * *(bb.l) + c2 * *(bb.l+i2)+ c3 * *(bb.l+i3) + c4 * *(bb.l+i4));
 /* also we need to round rather than truncate, taking that extra care */
 *out.l++ = rint(xq); break;
 case 3:
 bb.f = base.f+iq;
 xq = b1*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 bb.f += j2;
 xq += b2*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 bb.f += j3;
 xq += b3*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 bb.f += j4;
 xq += b4*(c1 * *(bb.f) + c2 * *(bb.f+i2)+ c3 * *(bb.f+i3) + c4 * *(bb.f+i4));
 *out.f++ = xq; break;
 case 4:
 bb.d = base.d+iq;
 xq = b1*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 bb.d += j2;
 xq += b2*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 bb.d += j3;
 xq += b3*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 bb.d += j4;
 xq += b4*(c1 * *(bb.d) + c2 * *(bb.d+i2)+ c3 * *(bb.d+i3) + c4 * *(bb.d+i4));
 *out.d++ = xq; break;
 }
 return;
 }
 /*------------------------------------------------------------------------- */
