#include "Util.h"
#include "defs.h"
#include "structs.h"

#include "common.h"
#include "filter.p"

#include <assert.h>


static int wFilt, hFilt;
Picture *filtPict[2] = {NULL, NULL};

/* ************************************************************************* */

void InitFilters(int w, int h)
{
  wFilt = w;
  hFilt = h;
  filtPict[0] = AllocPicture(w, h);
  filtPict[1] = AllocPicture(w, h);

  InitInterpolator();
}

/* ************************************************************************* */

void DisinitFilters(void)
{
  wFilt = 0;
  hFilt = 0;
  FreePicture(filtPict[0]);
  FreePicture(filtPict[1]);
}

/* ************************************************************************* */

static void Downsample(Byte *in, Byte *out, int iw, int ih) {
  int i, j;
  int p1, p2, p3, p4;
  int ow;

  ow = iw>>1;

  for (j=0; j<ih; j+=2)

    for (i=0; i<iw; i+=2) {
      
      p1= in[iw*j+i];
      p2=in[iw*j+i+1];
      p3= in[iw*(j+1)+i];
      p4=in[iw*(j+1)+i+1];

      out[(j>>1)*ow+(i>>1)] = ROUND((p1+p2+p3+p4)/4.0);
    }
}

/* ************************************************************************* */

void DownsampleSub(Byte *in, Byte *out, int iw, int iws, int ihs, int ow) {
  int i, j;
  int p1, p2, p3, p4;


  for (j=0; j<ihs; j+=2)

    for (i=0; i<iws; i+=2) {
      
      p1= in[iw*j+i];
      p2=in[iw*j+i+1];
      p3= in[iw*(j+1)+i];
      p4=in[iw*(j+1)+i+1];

      out[(j>>1)*ow+(i>>1)] = ROUND((p1+p2+p3+p4)/4.0);
    }
}

/* ************************************************************************* */

static unsigned short ITab[256], I3Tab[256], I98Tab[256];

void InitInterpolator()
{
  int i;

  for (i=0; i<256; i++) {
    I3Tab[i] = i * 3;
    I98Tab[i] = i * 9 + 8;   /* +8 for rounding */
    ITab[i] = 3*i+2;
  }
}
/* ************************************************************************* */

#ifdef SLOW_FILTERS
static void Interpolate(Byte *in, Byte *out, int iw, int ih) {
  register Byte *inData, *outData;
  register Byte x1,x2,x3;
  register unsigned short t;

  int i,j;
  int ow = iw<<1;
  int ow2 = ow<<1;
  int ow4 = ow2<<1;
  int oh = ih<<1;

  if (iw==1) {
    out[0] = out[1] = out[ow] = out[ow+1] = in[0];
    return;
  }

  /* Horizontal upsampling */

  inData = in;
 
  for (j=0; j<ih; j++, inData+=iw) {
    x1 = inData[0]; x2 = x1; x3 = inData[1];
    outData = out+(j*ow2);
    for (i=0; i<(iw-1); i++, outData+=2) {
      t=ITab[x2];
      outData[0] = (Byte) ((t+x1)>>2); outData[1] = (Byte) ((t+x3)>>2);
      x1 = x2; x2 = x3; x3 = inData[i+2];
    }
    t=ITab[x2];
    /* Last pixel in row (x2) is replicated */
    outData[0] = (Byte) ((t+x1)>>2); outData[1] = (Byte) ((t+x2)>>2);
  }

  /* Vertical upsampling */

  for (i=0; i<ow; i++) {
    outData = out + i;
    x1 = outData[0]; x2 = x1; x3 = outData[ow2];
    for (j=0; j<oh-4; j+=2, outData+=(ow2)) {
      t=ITab[x2];
      outData[0] = (t+x1)>>2; outData[ow] = (t+x3)>>2;
      x1 = x2; x2 = x3; x3 = outData[ow4];
    }
    /* Replicate border pixels */
    t=ITab[x2]; outData[0] = (t+x1)>>2; outData[ow] = (t+x3)>>2;
    t=ITab[x3]; outData[ow2] = (t+x2)>>2; outData[ow2+ow] = (t+x3)>>2;
  }
}

/* ************************************************************************* */

void InterpolateSub(Byte *in, Byte *out, int iw, int iws, int ihs, int ow) {
  register Byte *inData, *outData;
  register Byte x1,x2,x3;
  register unsigned short t;

  int i,j;
  int ow2 = ow<<1;
  int ow4 = ow2<<1;
  int oh = ihs<<1;
  int ows = iws<<1;

  if (iw==1) {
    out[0] = out[1] = out[ow] = out[ow+1] = in[0];
    return;
  }

  /* Horizontal upsampling */

  inData = in;
 
  for (j=0; j<ihs; j++, inData+=iw) {
    x1 = inData[0]; x2 = x1; x3 = inData[1];
    outData = out+(j*ow2);
    for (i=0; i<(iws-1); i++, outData+=2) {
      t=ITab[x2];
      outData[0] = (Byte) ((t+x1)>>2); outData[1] = (Byte) ((t+x3)>>2);
      x1 = x2; x2 = x3; x3 = inData[i+2];
    }
    t=ITab[x2];
    /* Last pixel in row (x2) is replicated */
    outData[0] = (Byte) ((t+x1)>>2); outData[1] = (Byte) ((t+x2)>>2);
  }

  /* Vertical upsampling */

  for (i=0; i<ows; i++) {
    outData = out + i;
    x1 = outData[0]; x2 = x1; x3 = outData[ow2];
    for (j=0; j<oh-4; j+=2, outData+=(ow2)) {
      t=ITab[x2];
      outData[0] = (t+x1)>>2; outData[ow] = (t+x3)>>2;
      x1 = x2; x2 = x3; x3 = outData[ow4];
    }
    /* Replicate border pixels */
    t=ITab[x2]; outData[0] = (t+x1)>>2; outData[ow] = (t+x3)>>2;
    t=ITab[x3]; outData[ow2] = (t+x2)>>2; outData[ow2+ow] = (t+x3)>>2;
  }
}
#endif

/* ************************************************************************* */
static void InterpolateHV_onepass (Byte *ii, Byte *oi, int iw, int ih)
{
  Byte *outData, *lastData, *lii;
  int i,j;
  int ow = iw * 2;
  int ns1, ns2;
  /*
   *  variable naming: i for "input", m for "minus", p for "plus"
   *    w for width as in variable iw. Hence: ipwm1 = *(ii+iw-1).
   *    appended 3 means variable contains 3 times the value.
   */
  int imwm1, imw, imw_3, im1, iv, imwp1, ip1, ipwm1, ipw, ipw_3, ipwp1;
  int lim1, limwm1, lipwm1;
  int imwp2, ip2, ipwp2, out11,out12,out13,out14,out21,out22,out23,out24;

  if (iw==1) {
    oi[0] = oi[1] = oi[2] = oi[3] = ii[0];
    return;
  }

  outData = oi;
  lii = ii + (ih-1)*iw;
  lastData = oi + 2*(ih-1)*ow;

  /**
   **  Deal with first and last scanline
   **/
  im1 = *ii;
  ipwm1 = *(ii+iw);
  lim1 = *lii;
  limwm1 = *(lii-iw);
  for (i=iw-1; i; i--) {
    iv = *ii;
    ip1 = *(ii+1); 

    /* scanline above image is assumed to be identical to top line */
    imwm1 = im1;

    imw_3 = I3Tab[iv];
    imwp1 = ip1;

    ipw_3 = I3Tab[*(ii+iw)];
    ipwp1 = *(ii+iw+1);

    ns1 = I3Tab[im1] + I98Tab[iv];
    ns2 = I98Tab[iv] + I3Tab[ip1];

    *outData = (imwm1 + imw_3 + ns1)>>4;
    *(outData+1) = (imw_3 + imwp1 + ns2)>>4;
    *(outData+ow) = (ns1 + ipwm1 + ipw_3)>>4;
    *(outData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
    outData += 2;
    ipwm1 = *(ii+iw);
    im1 = iv;
    ii++;

    /* scanline below image is assumed to be identical with last line */
    imw_3 = I3Tab[*(lii-iw)];
    imwp1 = *(lii-iw+1);

    iv = *lii;
    ip1 = *(lii+1);
    lipwm1 = lim1;
    ipw_3 = I3Tab[iv];
    ipwp1 = ip1;

    ns1 = I3Tab[lim1] + I98Tab[iv];
    ns2 = I98Tab[iv] + I3Tab[ip1];

    *lastData = (limwm1 + imw_3 + ns1)>>4;
    *(lastData+1) = (imw_3 + imwp1 + ns2)>>4;
    *(lastData+ow) = (ns1 + lipwm1 + ipw_3)>>4;
    *(lastData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
    lastData += 2;
    limwm1 = *(lii-iw);
    lim1 = iv;
    lii++;
  }
  /* deal with last pixel of first & last scanline */
  iv = *ii;
  ip1 = iv;
  imwm1 = im1;
  imw_3 = I3Tab[iv];
  imwp1 = ip1;
  ipwm1 = *(ii+iw-1);
  ipw_3 = I3Tab[*(ii+iw)];
  ipwp1 = *(ii+iw);
  ns1 = I3Tab[im1] + I98Tab[iv];
  ns2 = I98Tab[iv] + I3Tab[ip1];
  *outData = (imwm1 + imw_3 + ns1)>>4;
  *(outData+1) = (imw_3 + imwp1 + ns2)>>4;
  *(outData+ow) = (ns1 + ipwm1 + ipw_3)>>4;
  *(outData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
  outData += 2;
  ii++;

  imwm1 = *(lii-iw-1);
  imw_3 = I3Tab[*(lii-iw)];
  imwp1 = *(lii-iw);
  iv = *lii;
  ip1 = iv;
  ipwm1 = lim1;
  ipw_3 = I3Tab[iv];
  ipwp1 = iv;
  ns1 = I3Tab[lim1] + I98Tab[iv];
  ns2 = I98Tab[iv] + I3Tab[ip1];
  *lastData = (imwm1 + imw_3 + ns1)>>4;
  *(lastData+1) = (imw_3 + imwp1 + ns2)>>4;
  *(lastData+ow) = (ns1 + ipwm1 + ipw_3)>>4;
  *(lastData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
  outData += ow;

  /**
   **  Blow each input pixel up to 4 interpolated pixels in output image
   **  unrolled one time
   **  - to spare a few register moves
   **  - to do 32-bit writes
   **/
  for (j=ih-2; j; j--) {
    iv = *ii;
    ipw = *(ii+iw);
    im1 = iv;          /* -1 omitted on leftmost pixel in line */
    imw = *(ii-iw);
    imwm1 = imw;
    ipwm1 = ipw;
    for (i=iw/2-1; i; i--) {
      imwp1 = *(ii-iw+1);
      ip1 = *(ii+1);
      ipwp1 = *(ii+iw+1);

      imw_3 = I3Tab[imw];
      ipw_3 = I3Tab[ipw];

      ns1 = I3Tab[im1] + I98Tab[iv];
      ns2 = I98Tab[iv] + I3Tab[ip1];

      out11 = imwm1 + imw_3 + ns1;
      out12 = imw_3 + imwp1 + ns2;
      out21 = ns1 + ipwm1 + ipw_3;
      out22 = ns2 + ipw_3 + ipwp1;

      /* unrolled */

      imwp2 = *(ii-iw+2);
      ip2 = *(ii+2);
      ipwp2 = *(ii+iw+2);

      imw_3 = I3Tab[imwp1];
      ipw_3 = I3Tab[ipwp1];

      ns1 = I3Tab[iv] + I98Tab[ip1];
      ns2 = I98Tab[ip1] + I3Tab[ip2];

      out13 = imw + imw_3 + ns1;
      out14 = imw_3 + imwp2 + ns2;
      out23 = ns1 + ipw + ipw_3;
      out24 = ns2 + ipw_3 + ipwp2;

#ifdef LITTLE_ENDIAN
      *((long *)outData) =
                 (((out11 & 0xff0)>>4)) | (((out12 & 0xff0)>>4)<<8) |
                 (((out13 & 0xff0)>>4)<<16) | ((out14>>4)<<24);
      *((long *)(outData + ow)) =
                 (((out21 & 0xff0)>>4)) | (((out22 & 0xff0)>>4)<<8) |
                 (((out23 & 0xff0)>>4)<<16) | ((out24>>4)<<24);
#else
      *((long *)outData) =
                 ((out11 & 0xff0)<<20) | ((out12 & 0xff0)<<12) |
                 ((out13 & 0xff0)<<4) | (out14>>4);
      *((long *)(outData + ow)) =
                 ((out21 & 0xff0)<<20) | ((out22 & 0xff0)<<12) |
                 ((out23 & 0xff0)<<4) | (out24>>4);
#endif
      outData += 4;
      ii += 2;
      imwm1 = imwp1;
      im1 = ip1;
      ipwm1 = ipwp1;
      imw = imwp2;
      iv = ip2;
      ipw = ipwp2;
    }
    /*
     *  Deal with last pixel(s) in scanline seperately
     *  - the 2nd last would need no special handling, but since
     *    we want to do 32-bit stroes (aka 2 pels at a time) above...
     *  - for interpolation pel right to image is same as rightmost
     *    (same as with all other borders)
     */
    imwp1 = *(ii-iw+1);
    ip1 = *(ii+1);
    ipwp1 = *(ii+iw+1);
    imw_3 = I3Tab[imw];
    ipw_3 = I3Tab[ipw];
    ns1 = I3Tab[im1] + I98Tab[iv];
    ns2 = I98Tab[iv] + I3Tab[ip1];
    out11 = imwm1 + imw_3 + ns1;
    out12 = imw_3 + imwp1 + ns2;
    out21 = ns1 + ipwm1 + ipw_3;
    out22 = ns2 + ipw_3 + ipwp1;

    /* now the very last one */
    imw_3 = I3Tab[imwp1];
    ipw_3 = I3Tab[ipwp1];
    ns1 = I3Tab[iv] + I98Tab[ip1];
    ns2 = I98Tab[ip1] + I3Tab[ip1];
    out13 = imw + imw_3 + ns1;
    out14 = imw_3 + imwp1 + ns2;
    out23 = ns1 + ipw + ipw_3;
    out24 = ns2 + ipw_3 + ipwp1;
#ifdef LITTLE_ENDIAN
      *((long *)outData) =
                 (((out11 & 0xff0)>>4)) | (((out12 & 0xff0)>>4)<<8) |
                 (((out13 & 0xff0)>>4)<<16) | ((out14>>4)<<24);
      *((long *)(outData + ow)) =
                 (((out21 & 0xff0)>>4)) | (((out22 & 0xff0)>>4)<<8) |
                 (((out23 & 0xff0)>>4)<<16) | ((out24>>4)<<24);
#else
      *((long *)outData) =
                 ((out11 & 0xff0)<<20) | ((out12 & 0xff0)<<12) |
                 ((out13 & 0xff0)<<4) | (out14>>4);
      *((long *)(outData + ow)) =
                 ((out21 & 0xff0)<<20) | ((out22 & 0xff0)<<12) |
                 ((out23 & 0xff0)<<4) | (out24>>4);
#endif
    ii += 2;
    outData += ow + 4;
  }
}

/* ************************************************************************* */
void InterpolateHV_onepassSub (Byte *ii, Byte *oi,
			       int iw, int iws, int ihs, int ow)
{
  Byte *outData, *lastData, *lii;
  int i,j;
  int ns1, ns2;
  int is = iw - iws; /* input skip */
  int os = ow - 2 * iws; /* output skip */
  /*
   *  variable naming: i for "input", m for "minus", p for "plus"
   *    w for width as in variable iw. Hence: ipwm1 = *(ii+iw-1).
   *    appended 3 means variable contains 3 times the value.
   */
  int imwm1, imw, imw_3, im1, iv, imwp1, ip1, ipwm1, ipw, ipw_3, ipwp1;
  int lim1, limwm1, lipwm1;
  int imwp2, ip2, ipwp2, out11,out12,out13,out14,out21,out22,out23,out24;

  if (iw==1) {
    oi[0] = oi[1] = oi[ow] = oi[ow+1] = ii[0];
    return;
  }

  outData = oi;
  lii = ii + (ihs-1)*iw;
  lastData = oi + 2*(ihs-1)*ow;

  /**
   **  Deal with first and last scanline
   **/
  im1 = *ii;
  ipwm1 = *(ii+iw);
  lim1 = *lii;
  limwm1 = *(lii-iw);
  for (i=iws-1; i; i--) {
    iv = *ii;
    ip1 = *(ii+1); 

    /* scanline above image is assumed to be identical to top line */
    imwm1 = im1;

    imw_3 = I3Tab[iv];
    imwp1 = ip1;

    ipw_3 = I3Tab[*(ii+iw)];
    ipwp1 = *(ii+iw+1);

    ns1 = I3Tab[im1] + I98Tab[iv];
    ns2 = I98Tab[iv] + I3Tab[ip1];

    *outData = (imwm1 + imw_3 + ns1)>>4;
    *(outData+1) = (imw_3 + imwp1 + ns2)>>4;
    *(outData+ow) = (ns1 + ipwm1 + ipw_3)>>4;
    *(outData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
    outData += 2;
    ipwm1 = *(ii+iw);
    im1 = iv;
    ii++;

    /* scanline below image is assumed to be identical with last line */
    imw_3 = I3Tab[*(lii-iw)];
    imwp1 = *(lii-iw+1);

    iv = *lii;
    ip1 = *(lii+1);
    lipwm1 = lim1;
    ipw_3 = I3Tab[iv];
    ipwp1 = ip1;

    ns1 = I3Tab[lim1] + I98Tab[iv];
    ns2 = I98Tab[iv] + I3Tab[ip1];

    *lastData = (limwm1 + imw_3 + ns1)>>4;
    *(lastData+1) = (imw_3 + imwp1 + ns2)>>4;
    *(lastData+ow) = (ns1 + lipwm1 + ipw_3)>>4;
    *(lastData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
    lastData += 2;
    limwm1 = *(lii-iw);
    lim1 = iv;
    lii++;
  }
  /* deal with last pixel of first & last scanline */
  iv = *ii;
  ip1 = iv;
  imwm1 = im1;
  imw_3 = I3Tab[iv];
  imwp1 = ip1;
  ipwm1 = *(ii+iw-1);
  ipw_3 = I3Tab[*(ii+iw)];
  ipwp1 = *(ii+iw);
  ns1 = I3Tab[im1] + I98Tab[iv];
  ns2 = I98Tab[iv] + I3Tab[ip1];
  *outData = (imwm1 + imw_3 + ns1)>>4;
  *(outData+1) = (imw_3 + imwp1 + ns2)>>4;
  *(outData+ow) = (ns1 + ipwm1 + ipw_3)>>4;
  *(outData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
  outData += 2;
  ii++;

  imwm1 = *(lii-iw-1);
  imw_3 = I3Tab[*(lii-iw)];
  imwp1 = *(lii-iw);
  iv = *lii;
  ip1 = iv;
  ipwm1 = lim1;
  ipw_3 = I3Tab[iv];
  ipwp1 = iv;
  ns1 = I3Tab[lim1] + I98Tab[iv];
  ns2 = I98Tab[iv] + I3Tab[ip1];
  *lastData = (imwm1 + imw_3 + ns1)>>4;
  *(lastData+1) = (imw_3 + imwp1 + ns2)>>4;
  *(lastData+ow) = (ns1 + ipwm1 + ipw_3)>>4;
  *(lastData+ow+1) = (ns2 + ipw_3 + ipwp1)>>4;
  outData += ow;

  /* Set the pointers because of the skip */
  ii += is;
  outData += os;

  if ((iws & 3)) {
    /**
     **  Blow each input pixel up to 4 interpolated pixels in output image
     **  unrolled one time
     **  - to spare a few register moves
     **  - to do 32-bit writes
     **/
    for (j=ihs-2; j; j--) {
      iv = *ii;
      ipw = *(ii+iw);
      im1 = iv;          /* -1 omitted on leftmost pixel in line */
      imw = *(ii-iw);
      imwm1 = imw;
      ipwm1 = ipw;
      for (i=iws/2; i; i--) {
	imwp1 = *(ii-iw+1);
	ip1 = *(ii+1);
	ipwp1 = *(ii+iw+1);

	imw_3 = I3Tab[imw];
	ipw_3 = I3Tab[ipw];

	ns1 = I3Tab[im1] + I98Tab[iv];
	ns2 = I98Tab[iv] + I3Tab[ip1];

	out11 = imwm1 + imw_3 + ns1;
	out12 = imw_3 + imwp1 + ns2;
	out21 = ns1 + ipwm1 + ipw_3;
	out22 = ns2 + ipw_3 + ipwp1;

	/* unrolled */

	imwp2 = *(ii-iw+2);
	ip2 = *(ii+2);
	ipwp2 = *(ii+iw+2);

	imw_3 = I3Tab[imwp1];
	ipw_3 = I3Tab[ipwp1];

	ns1 = I3Tab[iv] + I98Tab[ip1];
	ns2 = I98Tab[ip1] + I3Tab[ip2];

	out13 = imw + imw_3 + ns1;
	out14 = imw_3 + imwp2 + ns2;
	out23 = ns1 + ipw + ipw_3;
	out24 = ns2 + ipw_3 + ipwp2;

	outData[0] = (out11 >> 4);
	outData[1] = (out12 >> 4);
	outData[2] = (out13 >> 4);
	outData[3] = (out14 >> 4);
	outData[ow] = (out21 >> 4);
	outData[ow+1] = (out22 >> 4);
	outData[ow+2] = (out23 >> 4);
	outData[ow+3] = (out24 >> 4);

	outData += 4;
	ii += 2;
	imwm1 = imwp1;
	im1 = ip1;
	ipwm1 = ipwp1;
	imw = imwp2;
	iv = ip2;
	ipw = ipwp2;
      }
      /*
       *  Deal with last pixel(s) in scanline seperately
       *  - the 2nd last would need no special handling, but since
       *    we want to do 32-bit stroes (aka 2 pels at a time) above...
       *  - for interpolation pel right to image is same as rightmost
       *    (same as with all other borders)
       */
      if (!(iws & 1)) {
	ii--;
	outData -= 2;
      }

      iv = *ii;
      im1 = *(ii-1);
      imw = *(ii-iw);
      imwm1 = *(ii-iw-1);
      ipw = *(ii+iw);
      ipwm1 = *(ii+iw-1);

      /* now the very last one */
      imw_3 = I3Tab[imw];
      ipw_3 = I3Tab[ipw];
      ns1 = I3Tab[im1] + I98Tab[iv];
      ns2 = I98Tab[iv] + I3Tab[iv];
      out13 = imwm1 + imw_3 + ns1;
      out14 = imw_3 + imw + ns2;
      out23 = ns1 + ipwm1 + ipw_3;
      out24 = ns2 + ipw_3 + ipw;

      outData[0] = (out13 >> 4);
      outData[1] = (out14 >> 4);
      outData[ow] = (out23 >> 4);
      outData[ow+1] = (out24 >> 4);

      ii++;
      outData += ow + 2;

      /* Set the pointers because of the skip */
      ii += is;
      outData += os;
    }
  } else {
    /**
    **  Blow each input pixel up to 4 interpolated pixels in output image
    **  unrolled one time
    **  - to spare a few register moves
    **  - to do 32-bit writes
    **/
    for (j=ihs-2; j; j--) {
      iv = *ii;
      ipw = *(ii+iw);
      im1 = iv;          /* -1 omitted on leftmost pixel in line */
      imw = *(ii-iw);
      imwm1 = imw;
      ipwm1 = ipw;
      for (i=iws/2-1; i; i--) {
	imwp1 = *(ii-iw+1);
	ip1 = *(ii+1);
	ipwp1 = *(ii+iw+1);

	imw_3 = I3Tab[imw];
	ipw_3 = I3Tab[ipw];

	ns1 = I3Tab[im1] + I98Tab[iv];
	ns2 = I98Tab[iv] + I3Tab[ip1];

	out11 = imwm1 + imw_3 + ns1;
	out12 = imw_3 + imwp1 + ns2;
	out21 = ns1 + ipwm1 + ipw_3;
	out22 = ns2 + ipw_3 + ipwp1;

	/* unrolled */

	imwp2 = *(ii-iw+2);
	ip2 = *(ii+2);
	ipwp2 = *(ii+iw+2);

	imw_3 = I3Tab[imwp1];
	ipw_3 = I3Tab[ipwp1];

	ns1 = I3Tab[iv] + I98Tab[ip1];
	ns2 = I98Tab[ip1] + I3Tab[ip2];

	out13 = imw + imw_3 + ns1;
	out14 = imw_3 + imwp2 + ns2;
	out23 = ns1 + ipw + ipw_3;
	out24 = ns2 + ipw_3 + ipwp2;

#ifdef LITTLE_ENDIAN
	*((long *)outData) =
	  (((out11 & 0xff0)>>4)) | (((out12 & 0xff0)>>4)<<8) |
	  (((out13 & 0xff0)>>4)<<16) | ((out14>>4)<<24);
	*((long *)(outData + ow)) =
	  (((out21 & 0xff0)>>4)) | (((out22 & 0xff0)>>4)<<8) |
	  (((out23 & 0xff0)>>4)<<16) | ((out24>>4)<<24);
#else
	*((long *)outData) =
	  ((out11 & 0xff0)<<20) | ((out12 & 0xff0)<<12) |
	  ((out13 & 0xff0)<<4) | (out14>>4);
	*((long *)(outData + ow)) =
	  ((out21 & 0xff0)<<20) | ((out22 & 0xff0)<<12) |
	  ((out23 & 0xff0)<<4) | (out24>>4);
#endif

	outData += 4;
	ii += 2;
	imwm1 = imwp1;
	im1 = ip1;
	ipwm1 = ipwp1;
	imw = imwp2;
	iv = ip2;
	ipw = ipwp2;
      }
      /*
       *  Deal with last pixel(s) in scanline seperately
       *  - the 2nd last would need no special handling, but since
       *    we want to do 32-bit stroes (aka 2 pels at a time) above...
       *  - for interpolation pel right to image is same as rightmost
       *    (same as with all other borders)
       */
      imwp1 = *(ii-iw+1);
      ip1 = *(ii+1);
      ipwp1 = *(ii+iw+1);
      imw_3 = I3Tab[imw];
      ipw_3 = I3Tab[ipw];
      ns1 = I3Tab[im1] + I98Tab[iv];
      ns2 = I98Tab[iv] + I3Tab[ip1];
      out11 = imwm1 + imw_3 + ns1;
      out12 = imw_3 + imwp1 + ns2;
      out21 = ns1 + ipwm1 + ipw_3;
      out22 = ns2 + ipw_3 + ipwp1;

      /* now the very last one */
      imw_3 = I3Tab[imwp1];
      ipw_3 = I3Tab[ipwp1];
      ns1 = I3Tab[iv] + I98Tab[ip1];
      ns2 = I98Tab[ip1] + I3Tab[ip1];
      out13 = imw + imw_3 + ns1;
      out14 = imw_3 + imwp1 + ns2;
      out23 = ns1 + ipw + ipw_3;
      out24 = ns2 + ipw_3 + ipwp1;

#ifdef LITTLE_ENDIAN
      *((long *)outData) =
	(((out11 & 0xff0)>>4)) | (((out12 & 0xff0)>>4)<<8) |
	(((out13 & 0xff0)>>4)<<16) | ((out14>>4)<<24);
      *((long *)(outData + ow)) =
	(((out21 & 0xff0)>>4)) | (((out22 & 0xff0)>>4)<<8) |
	(((out23 & 0xff0)>>4)<<16) | ((out24>>4)<<24);
#else
      *((long *)outData) =
	((out11 & 0xff0)<<20) | ((out12 & 0xff0)<<12) |
	((out13 & 0xff0)<<4) | (out14>>4);
      *((long *)(outData + ow)) =
	((out21 & 0xff0)<<20) | ((out22 & 0xff0)<<12) |
	((out23 & 0xff0)<<4) | (out24>>4);
#endif
      ii += 2;
      outData += ow + 4;

      /* Set the pointers because of the skip */
      ii += is;
      outData += os;
    }
  }
}

/* ************************************************************************* */

void DownsamplePicture(Picture *in, Picture *out) {
  Downsample(in->y, out->y, in->w, in->h);
  Downsample(in->u, out->u, in->w/2, in->h/2);
  Downsample(in->v, out->v, in->w/2, in->h/2);
}

/* ************************************************************************* */

#ifdef SLOW_FILTERS
void InterpolatePicture(Picture *in, Picture *out) {
  Interpolate(in->y, out->y, in->w, in->h);
  Interpolate(in->u, out->u, in->w/2, in->h/2);
  Interpolate(in->v, out->v, in->w/2, in->h/2);
}
#endif

/* ************************************************************************* */

void InterpolatePictureFast(Picture *in, Picture *out) {
  InterpolateHV_onepass(in->y, out->y, in->w, in->h);
  InterpolateHV_onepass(in->u, out->u, in->w/2, in->h/2);
  InterpolateHV_onepass(in->v, out->v, in->w/2, in->h/2);
}

/* ************************************************************************* */


/***********************************************************CommentBegin******
 *****************************************************************************
 *
 * -- DownsamplePictureSub -- Sample down a picture (ev. subimage)
 *
 * Author:              K.S.
 *
 * Created:             28-May-98
 *
 * Purpose:             Samples a picture down by a factor of 2.
 *                      All picture dimensions are multiples of 16.
 * 
 * Arguments in:        Picture *pictIn    input picture
 *                      Picture *pictOut   output picture (half size of in.p.)
 *
 * Arguments in/out:    -
 *
 * Arguments out:       -
 *
 * Return values:       -
 *
 * Example:             DownsamplePictureSub(pictIn,pictOut);
 *
 * Side effects:        -
 *
 * Description:         The pictures must have the apropriate sizes.
 *
 * See also:            -
 *
 * Modified:            -
 *
 *****************************************************************************/
/* #define CENTERED */

void DownsamplePictureSub(Picture *pictIn, Picture *pictOut)
 /**********************************************************CommentEnd********/
{
  int ws = pictIn->ws, hs = pictIn->hs;
  Byte *yi, *ui, *vi, *yo, *uo, *vo;
#ifdef CENTERED
  int  offsetXi = 0, offsetYi = 0, offsetXo = 0, offsetYo = 0;
#endif

  yi = pictIn->y;
  ui = pictIn->u;
  vi = pictIn->v;
  yo = pictOut->y;
  uo = pictOut->u;
  vo = pictOut->v;

#ifdef CENTERED
  offsetXi = (pictIn->w - wi) / 2;
  offsetYi = (pictIn->h - hi) / 2;
  offsetXo = (pictOut->w - wi / 2) / 2;
  offsetYo = (pictOut->h - hi / 2) / 2;

  yi += (offsetXi + offsetYi * pictIn->w);
  ui += ((offsetXi>>1) + (offsetYi>>1) * (pictIn->w>>1));
  vi += ((offsetXi>>1) + (offsetYi>>1) * (pictIn->w>>1));
  yo += (offsetXo + offsetYo * pictOut->w);
  uo += ((offsetXo>>1) + (offsetYo>>1) * (pictOut->w>>1));
  vo += ((offsetXo>>1) + (offsetYo>>1) * (pictOut->w>>1));
#endif

  DownsampleSub(yi, yo, pictIn->w, ws, hs, pictOut->w);
  DownsampleSub(ui, uo, pictIn->w/2, ws/2, hs/2, pictOut->w/2);
  DownsampleSub(vi, vo, pictIn->w/2, ws/2, hs/2, pictOut->w/2);
}




/***********************************************************CommentBegin******
 *****************************************************************************
 *
 * -- MultDownsamplePictureSub -- Sample down a picture mult. (ev. subimage)
 *
 * Author:              K.S.
 *
 * Created:             7-Jul-98
 *
 * Purpose:             Samples a picture down by a factor of 2, 4, 8, ....
 *                      All picture dimensions are multiples of 16.
 * 
 * Arguments in:        int     mult       the picture is sampled down
 *                                         mult times by a factor of 2.
 *                      Picture *pictIn    input picture
 *                      Picture *pictOut   output picture
 *
 * Arguments in/out:    -
 *
 * Arguments out:       -
 *
 * Return values:       -
 *
 * Example:             MultDownsamplePictureSub(mult,pictIn,pictOut);
 *
 * Side effects:        -
 *
 * Description:         The pictures must have the apropriate sizes.
 *
 * See also:            -
 *
 * Modified:            -
 *
 *****************************************************************************/
void MultDownsamplePictureSub(int mult, Picture *pictIn, Picture *pictOut)
 /**********************************************************CommentEnd********/
{
  if (mult == 0) {
    CopyPicture(pictIn, pictOut);
  } else {
    int wi, hi, wis, his, wo;
    Byte *yi, *ui, *vi, *yo, *uo, *vo;

    yi = pictIn->y;
    ui = pictIn->u;
    vi = pictIn->v;
    wi = pictIn->w;
    hi = pictIn->h;
    wis = pictIn->ws;
    his = pictIn->hs;

    for (mult--; mult; mult--) {
      yo = filtPict[mult&1]->y;
      uo = filtPict[mult&1]->u;
      vo = filtPict[mult&1]->v;
      wo = (pictOut->w << mult);

      DownsampleSub(yi, yo, wi, wis, his, wo);
      DownsampleSub(ui, uo, wi/2, wis/2, his/2, wo/2);
      DownsampleSub(vi, vo, wi/2, wis/2, his/2, wo/2);

      yi = yo;
      ui = uo;
      vi = vo;
      wi = wo;
      wis = (wis >> 1);
      his = (his >> 1);
    }

    yo = pictOut->y;
    uo = pictOut->u;
    vo = pictOut->v;

    DownsampleSub(yi, yo, wi, wis, his, pictOut->w);
    DownsampleSub(ui, uo, wi/2, wis/2, his/2, pictOut->w/2);
    DownsampleSub(vi, vo, wi/2, wis/2, his/2, pictOut->w/2);
  }
}




/***********************************************************CommentBegin******
 *****************************************************************************
 *
 * -- InterpolateFastPictureSub -- Interpolate a picture (ev. subimage)
 *
 * Author:              K.S.
 *
 * Created:             28-May-98
 *
 * Purpose:             Interpolates a picture to twice its size.
 *                      All picture dimensions are multiples of 16.
 *                      The output picture is inserted in the middle, if
 *                      a division by 2 would result in an inappropriate size.
 *                      The input picture is extracted from the middle in the
 *                      same manner.
 * 
 * Arguments in:        Picture *pictIn    input picture
 *                      Picture *pictOut   output picture (double size of in.p.)
 *
 * Arguments in/out:    -
 *
 * Arguments out:       -
 *
 * Return values:       -
 *
 * Example:             InterpolateFastPictureSub(pictIn,pictOut);
 *
 * Side effects:        -
 *
 * Description:         The pictures must have the apropriate sizes.
 *
 * See also:            -
 *
 * Modified:            -
 *
 *****************************************************************************/
void InterpolateFastPictureSub(Picture *pictIn, Picture *pictOut)
 /**********************************************************CommentEnd********/
{
  int wi, hi;
  Byte *yi, *ui, *vi, *yo, *uo, *vo;
#ifdef CENTERED
  int  offsetXi = 0, offsetYi = 0, offsetXo = 0, offsetYo = 0;
#endif

  yi = pictIn->y;
  ui = pictIn->u;
  vi = pictIn->v;
  yo = pictOut->y;
  uo = pictOut->u;
  vo = pictOut->v;

  wi = pictIn->ws;
  hi = pictIn->hs;
#if 0
  wi = MIN(pictIn->w, pictOut->w / 2);
  hi = MIN(pictIn->h, pictOut->h / 2);
#endif

#ifdef CENTERED
  offsetXi = (pictIn->w - wi) / 2;
  offsetYi = (pictIn->h - hi) / 2;
  offsetXo = (pictOut->w - 2 * wi) / 2;
  offsetYo = (pictOut->h - 2 * hi) / 2;

  yi += (offsetXi + offsetYi * pictIn->w);
  ui += ((offsetXi>>1) + (offsetYi>>1) * (pictIn->w>>1));
  vi += ((offsetXi>>1) + (offsetYi>>1) * (pictIn->w>>1));
  yo += (offsetXo + offsetYo * pictOut->w);
  uo += ((offsetXo>>1) + (offsetYo>>1) * (pictOut->w>>1));
  vo += ((offsetXo>>1) + (offsetYo>>1) * (pictOut->w>>1));
#endif

  InterpolateHV_onepassSub(yi, yo, pictIn->w, wi, hi, pictOut->w);
  InterpolateHV_onepassSub(ui, uo, pictIn->w/2, wi/2, hi/2, pictOut->w/2);
  InterpolateHV_onepassSub(vi, vo, pictIn->w/2, wi/2, hi/2, pictOut->w/2);


  /*
  InterpolateSub(yi, yo, pictIn->w, wi, hi, pictOut->w);
  InterpolateSub(ui, uo, pictIn->w/2, wi/2, hi/2, pictOut->w/2);
  InterpolateSub(vi, vo, pictIn->w/2, wi/2, hi/2, pictOut->w/2);
  */
}


/***********************************************************CommentBegin******
 *****************************************************************************
 *
 * -- MultInterpolateFastPictureSub -- Interpolate a picture mult. (ev. subimg)
 *
 * Author:              K.S.
 *
 * Created:             7-Jul-98
 *
 * Purpose:             Interpolates a picture to twice its size.
 *                      All picture dimensions are multiples of 16.
 * 
 * Arguments in:        int     mult
 *                      Picture *pictIn    input picture
 *                      Picture *pictOut   output picture
 *
 * Arguments in/out:    -
 *
 * Arguments out:       -
 *
 * Return values:       -
 *
 * Example:             MultInterpolateFastPictureSub(mult,pictIn,pictOut);
 *
 * Side effects:        -
 *
 * Description:         The pictures must have the apropriate sizes.
 *
 * See also:            -
 *
 * Modified:            -
 *
 *****************************************************************************/
void MultInterpolateFastPictureSub(int mult, Picture *pictIn, Picture *pictOut)
 /**********************************************************CommentEnd********/
{
  if (mult == 0) {
    CopyPicture(pictIn, pictOut);
  } else {
    int wi, wis, his, wo;
    Byte *yi, *ui, *vi, *yo, *uo, *vo;

    yi = pictIn->y;
    ui = pictIn->u;
    vi = pictIn->v;
    wi = pictIn->w;
    wis = pictIn->ws;
    his = pictIn->hs;

    for (mult--; mult; mult--) {
      yo = filtPict[mult&1]->y;
      uo = filtPict[mult&1]->u;
      vo = filtPict[mult&1]->v;
      wo = (wis << 1);

      InterpolateHV_onepassSub(yi, yo, wi, wis, his, wo);
      InterpolateHV_onepassSub(ui, uo, wi/2, wis/2, his/2, wis/*==wo/2*/);
      InterpolateHV_onepassSub(vi, vo, wi/2, wis/2, his/2, wis/*==wo/2*/);

      yi = yo;
      ui = uo;
      vi = vo;
      wi = wo;
      wis = (wis << 1);
      his = (his << 1);
    }

    yo = pictOut->y;
    uo = pictOut->u;
    vo = pictOut->v;

    InterpolateHV_onepassSub(yi, yo, wi, wis, his, pictOut->w);
    InterpolateHV_onepassSub(ui, uo, wi/2, wis/2, his/2, pictOut->w/2);
    InterpolateHV_onepassSub(vi, vo, wi/2, wis/2, his/2, pictOut->w/2);
  }
}

