#include <values.h>
#include <stdio.h>
#include <string.h>

#include "Crop.h"
#include "motest.h"
#include "Parameter.h"
#include "Image.h"
#include "BlockFuncs.h"
#include "Headers.h"
#include "Syntax.h"
#include "CodeSymbol.h"
#include "BlockCoder.h"
#include "Filter.h"
#include "Output.h"
#include "Statistics.h"
#include "KhorosIO.h"
#include "Lattice.h"

#include "Motion.h"
#include "config.h"
#include "dpcm.h"

#include "Buffers.h"

#define BPMC1

/* #define FAST_ME */

extern double gyVar, guVar, gvVar;
extern void Code4x4Block(Byte *image, int iw, int lum);
extern void Code8x8Block(Byte *image, int iw, int lum);

static int gbw, gbh;
static int gyact[256], guact[64], gvact[64];
static int gyspre[256], guspre[64], gvspre[64];
static int gytplp[256], gutplp[64], gvtplp[64];
static int gytpbp[256], gutpbp[64], gvtpbp[64];
static int gytmp[256], gutmp[64], gvtmp[64];

static int BlockSAD (int *act, int *spre, int size) {
  int i;
  int sum;
  int diff;

  sum = 0;

  for (i=0; i<size; i++) {
    diff = act[i] - spre[i];
    sum += ABS(diff);
  }
  return(sum);
}

static int IntraVar (int *act, int size) {
  int i;
  int mean, var;

  mean = 0;
  for (i=0; i<size; i++) mean += act[i];
  mean = ROUND(mean/(double)size);

  var = 0;
  for (i=0; i<size; i++) var += ABS(act[i] - mean);

  return(var);
}

void BlockSubsamp8 (int *b1, int *b2, int w, int h) {
  int i, j, ii, jj;
  int sum;

  for (j=0; j<h; j+=8)
    for (i=0; i<w; i+=8) {
      sum = 0;
      for (jj=0; jj<8; jj++)
	for (ii=0; ii<8; ii++)
	  sum += b1[w*(j+jj)+i+ii];

      b2[(j/8)*(w/8)+(i/8)] = ROUND(sum/64.0);
    }
}

void BlockUpsamp8 (int *b1, int *b2, int w, int h) {
  int i, j;

  for (j=0; j<8*h; j++)
    for (i=0; i<8*w; i++) 
      b2[j*(8*w)+i] = b1[(j/8)*w+(i/8)];
      
}

static void CodeLowestPLayer(FramePtr Actual, FramePtr Decoded, FramePtr RefRec, FramePtr RefOrig, FramePtr MotComp, MVFieldPtr mv, int l)
{
  int fx[5], fy[5], hx[5], hy[5], dfx, dfy;
  int i,j;
  int sad_intra, sad_fp, sad0_fp, sad_hp, sad0_hp;
  int mode;
  int ymean[256], umean[64], vmean[64];
  
  gyVar = 1.25;
  guVar = 0.75;
  gvVar = 0.75;

  for (j=0; j<Actual->Y->h; j+=gbh) {

    ResetMVPredictor();
    ResetIntraPredictors();

    if (j!=0 && SLICE_SYNC==1) SendStartOfSlice();
    fx[0] = fy[0] = 0;

    for (i=0; i<Actual->Y->w; i+=gbw) {

      GetMB(Actual, i, j, gbw, gyact, guact, gvact);

#ifdef FAST_ME
      sad_fp = FPEstimationFast (Actual->Y->data, RefOrig->Y->data, actNorm[l], refNorm[l], Actual->Y->w, Actual->Y->h, i, j, ParBlk.FwdXRange[l], &fx[0], &fy[0], &sad0_fp);
#else
      sad_fp = FPEstimationSlow (Actual->Y->data, RefOrig->Y->data, Actual->Y->w, Actual->Y->h, i, j, gbw, ParBlk.FwdXRange[l], &fx[0], &fy[0], &sad0_fp);
#endif

      if ((sad0_fp-100)<sad_fp) {
	sad_fp = sad0_fp-100;
	fx[0] = fy[0] = 0;
      }
      
      /* Intra/Inter decision */
      sad_intra = IntraVar(gyact, gbw*gbh);
      if ((sad_intra) < (sad_fp)) mode = 1; else mode = 0; 

      if (mode==0) {
	fprintf(stderr,"P");
	sad_hp = HPRefine (Actual->Y->data, RefRec->Y->data, Actual->Y->w, Actual->Y->h, i, j, gbw, fx[0], fy[0], &hx[0], &hy[0], &sad0_hp);

	/*
	if ((sad_fp-100)<sad_hp) {
	  fx[0] = fy[0] = 0;
	} else 
	*/

	  if ((sad0_hp-100)<sad_hp) {
	  fx[0] = 2*fx[0]; fy[0] = 2*fy[0];
	} else {
	  fx[0] = 2*fx[0] + hx[0]; fy[0] = 2*fy[0] + hy[0];
	}

	dfx = fx[0]; dfy = fy[0];

	SetMotionVector(mv, i, j, fx[0], fy[0], mode);
	/* SetMotionVector(mv, i, j, 0, 0, mode); */
	GetTranslatedMB(RefRec, i, j, fx[0], fy[0], gbw, gytplp, gutplp, gvtplp);

	/* mse += BlockMSE(gyact, gytplp, gbw*gbh); n_mse++; */

#ifdef BPMC1
	PutMB(MotComp, i, j, gbw, gytplp, gutplp, gvtplp);
#endif
	SetMBCodingInfo(gbw/2, SeqHdr.PPreset[l], FWD_TPLP_BLOCK, dfx,dfy,0,0);

	QuantizeInterMB(gyact, guact, gvact, gytplp, gutplp, gvtplp);

	SendMB(gyact, guact, gvact, gytplp, gutplp, gvtplp, ymean, umean, vmean);

	ResetIntraPredictors();

      } else {
	fprintf(stderr,"I");
	fx[0] = fy[0] = 0;
	SetMotionVector(mv, i, j, 0, 0, mode);
#ifdef BPMC1
	GetMB(RefRec, i, j, gbw, gytplp, gutplp, gvtplp);
	PutMB(MotComp, i, j, gbw, gytplp, gutplp, gvtplp);
#endif
	SetMBCodingInfo(gbw/2, SeqHdr.IPreset[l], SP_BLOCK, 0,0,0,0); 

	Vect2ImageOrder16x16(gyact, gytmp, 16);
	BlockSubsamp8(gytmp, ymean,  16,  16);
	BlockUpsamp8(ymean, gytmp, 2, 2); 
	Image2VectOrder16x16(gytmp, gyspre, 16);

	Vect2ImageOrder8x8(guact, gutmp, 8);
	BlockSubsamp8(gutmp, umean,  8,  8);
	BlockUpsamp8(umean, gutmp, 1, 1);
	Image2VectOrder8x8(gutmp, guspre, 8);

	Vect2ImageOrder8x8(gvact, gvtmp, 8);
	BlockSubsamp8(gvtmp, vmean,  8,  8);
	BlockUpsamp8(vmean, gvtmp, 1, 1);
	Image2VectOrder8x8(gvtmp, gvspre, 8);

	QuantizeInterMB(gyact, guact, gvact, gyspre, guspre, gvspre);

	SendMB(gyact, guact, gvact, gyspre, guspre, gvspre, ymean, umean, vmean);
      }
      PutMB(Decoded, i, j, gbw, gyact, guact, gvact); 
    }
    fprintf(stderr,"\n");
  }
  /* fprintf(stderr,"P_MSE=%f\n",(double)mse/(double)n_mse); */
}

int dfxhist[256];
int dfyhist[256];

void InitMVHist() {
  memset(dfxhist,0,256*sizeof(int));
  memset(dfyhist,0,256*sizeof(int));
}

void PrintMVHist() {
  int i;

  for (i=0; i<256; i++)
    fprintf(stderr,"%d ", dfxhist[i]);

  fprintf(stderr,"\n");

  for (i=0; i<256; i++)
    fprintf(stderr,"%d ", dfyhist[i]);

}
static void CodePLayer (FramePtr Actual, FramePtr Decoded, FramePtr RefRec, FramePtr RefOrig, FramePtr MotComp, MVFieldPtr mv, int l)
{
  int fx0, fy0, fx, fy, dfx, dfy, dummy, hxlp, hylp;
  int i,j, n;

  int sad_intra, sad0_lphp, sadr_lpfp, sadr_lphp;
  int mode;
  
  for (j=0; j<Actual->Y->h; j+=gbh) {

    ResetMVPredictor();
    if (j!=0 && SLICE_SYNC==1)
      SendStartOfSlice();

    for (i=0; i<Actual->Y->w; i+=gbw) {

      GetMotionVector (mv, i, j, &dummy, &dummy, &fx0, &fy0, &dummy);

      GetMB(Actual, i, j, gbw, gytmp, gutmp, gvtmp);

      /* Current original */
      GetMB(Actual, i, j, gbw, gyact, guact, gvact);

      /* Current interpolation from lower resolution reconstructed */
      GetMB(Decoded, i, j, gbw, gyspre, guspre, gvspre);

      /* Compute SAD for Intra mode */
      /* sad_intra = IntraVar(gyact, gbw*gbh); */
      sad_intra = BlockSAD(gyact, gyspre, gbw*gbh);

      GetTranslatedMB(RefRec, i, j, 2*fx0, 2*fy0, gbw, gytplp, gutplp, gvtplp);

      /* Compute motion vector refinement */
      FPRefineEstimationSlow (Actual->Y->data, RefOrig->Y->data, Actual->Y->w, Actual->Y->h, i, j, SeqHdr.BlockWidth[l], ParBlk.FwdXRange[l], fx0, fy0, &fx, &fy, &dummy);

      GetTranslatedMB(RefRec, i, j, 2*fx, 2*fy, gbw, gytplp, gutplp, gvtplp);
      sadr_lpfp = BlockSAD(gyact, gytplp, gbw*gbh);

      if ((sad_intra)<sadr_lpfp) {
	mode = 0;
      } else {
	mode = 1;
      }

      switch(mode) {
      case 0: 
	fprintf(stderr,"S");
	SetMotionVector(mv, i, j, 2*fx0, 2*fy0, mode); 
	SetMBCodingInfo(gbw/2, SeqHdr.PPreset[l], SP_BLOCK, 0,0,0,0);
	QuantizeInterMB(gyact, guact, gvact, gyspre, guspre, gvspre); 
	SendMB(gyact, guact, gvact, gyspre, guspre, gvspre, NULL, NULL, NULL);

	GetTranslatedMB(RefRec, i, j, 0, 0, gbw, gytplp, gutplp, gvtplp);
	PutMB(MotComp, i, j, gbw, gytplp, gutplp, gvtplp);

	break;
      case 1: 

	/* Lowpass half-pel refinement: */
	sadr_lphp = HPRefine (Actual->Y->data, RefRec->Y->data, Actual->Y->w, Actual->Y->h, i, j, 16, fx, fy, &hxlp, &hylp, &sad0_lphp); 

	if ((sad0_lphp-100)<=sadr_lphp) {
	  hxlp = hylp = 0;
	}

	fx = 2*fx + hxlp;
	fy = 2*fy + hylp;

	fprintf(stderr,"P");

	GetTranslatedMB(RefRec, i, j, fx, fy, gbw, gytplp, gutplp, gvtplp);

	SetMotionVector(mv, i, j, fx, fy, mode);
	dfx= fx - (2*fx0); dfy = fy - (2*fy0);

	dfxhist[ABS(dfx)]++;
	dfyhist[ABS(dfy)]++;

	SetMBCodingInfo(gbw/2, SeqHdr.PPreset[l], FWD_TPLP_BLOCK, dfx,dfy,0,0);

	/* mse += BlockMSE(gyact, gytplp, gbw*gbh); n_mse++; */

	QuantizeInterMB(gyact, guact, gvact, gytplp, gutplp, gvtplp);
	SendMB(gyact, guact, gvact, gytplp, gutplp, gvtplp, NULL, NULL, NULL);

	PutMB(MotComp, i, j, gbw, gytplp, gutplp, gvtplp);

	break;

      case 2: 
	fprintf(stderr,"B");

	GetTranslatedMB(RefRec, i, j, fx, fy, gbw, gytplp, gutplp, gvtplp);
	/* GetTranslatedMB(MotComp, i, j, fx, fy, gbw, gytpbp, gutpbp, gvtpbp); */

	GetMB(MotComp, i, j, gbw, gytpbp, gutpbp, gvtpbp);

	SetMotionVector(mv, i, j, fx, fy, mode); 
	dfx= fx - (2*fx0); dfy = fy - (2*fy0);
	SetMBCodingInfo(gbw/2, SeqHdr.PPreset[l], FWD_TPBP_BLOCK, dfx,dfy,0,0);

	for (n=0; n<gbw*gbh; n++) gytpbp[n] = CLIP(gytplp[n] + gyspre[n] - gytpbp[n]);
	for (n=0; n<gbw*gbh/4; n++) gutpbp[n] = CLIP(gutplp[n] + guspre[n] - gutpbp[n]);
	for (n=0; n<gbw*gbh/4; n++) gvtpbp[n] = CLIP(gvtplp[n] + gvspre[n] - gvtpbp[n]);

        QuantizeInterMB(gyact, guact, gvact, gytpbp, gutpbp, gvtpbp);
	SendMB(gyact, guact, gvact, gytpbp, gutpbp, gvtpbp, NULL, NULL, NULL);

	break;

      }
      PutMB(Decoded, i, j, gbw, gyact, guact, gvact); 
      /*
      fprintf(stderr,"\n>>>%d\n",BlockMSE(gutmp, guact, gbw*gbh));

      MSESumY[mode] += BlockMSE(gytmp, gyact, gbw*gbh);
      MSESumU[mode] += BlockMSE(gutmp, guact, gbw*gbh);
      MSESumV[mode] += BlockMSE(gvtmp, gvact, gbw*gbh);

      ModeCount[mode]++;
      */
    }
    fprintf(stderr,"\n");
  }
  /* fprintf(stderr,"P_MSE=%f\n",(double)mse/(double)n_mse); */
}

static void CodePFrameLayer (FramePtr Actual, FramePtr Decoded, FramePtr RefRec, FramePtr RefOrig, FramePtr MotComp, MVFieldPtr mv, int l, int IsFirstLayer, int IsLastLayer) {
  char name[80];

  SMStartLayerCoding(l);

  Decoded->TempRef = Actual->TempRef;
  Decoded->SeqIndex = Actual->SeqIndex;

  gbw = SeqHdr.BlockWidth[l];
  gbh = SeqHdr.BlockHeight[l];

  SetFrameCodingInfo(P_FRAME, Actual->TempRef);
  SetLayerCodingInfo(IsFirstLayer,IsLastLayer, l);

  if (IsFirstLayer) {
    CodeLowestPLayer(Actual,Decoded, RefRec, RefOrig, MotComp, mv, l);
  } else {
    SendStartOfLayer(l, P_CODED);
    CodePLayer(Actual, Decoded, RefRec, RefOrig, MotComp, mv, l);
  }

  SMEndLayerCoding();
  SMLayerQuantizationError(Actual, Decoded);
}

extern unsigned long layerBits[4];

void CodePFrame (PyramidPtr Actual, PyramidPtr Decoded, PyramidPtr RefRec, PyramidPtr RefOrig, PyramidPtr MotComp, MultiScaleFieldPtr mv, int TempRef, int start, int end) {

  int l, i;
  char name[80];
  unsigned long Count, currBits;

  SMStartFrameCoding(TempRef);

  ResetPictureHeader();
  PictHdr.CodingType = 2;
  PictHdr.TempRef = TempRef;

  Count = GlobalBitCount;
  layerBits[start] += (GlobalBitCount - Count);

  SendStartOfPicture(start,P_CODED);

  CodePictureHeader();

  for (l=start; l>=end; l--) {

    Count = GlobalBitCount;
    /* if (l != start - 1) */

    if ( ((TempRef%8)==0) && (l==0) ) {
      SendStartOfLayer(l, I_CODED);
      CodeIFrameLayer(Actual->Layer[l], Decoded->Layer[l], l, (l==start), (l==end));
    } else {
      CodePFrameLayer(Actual->Layer[l], Decoded->Layer[l], RefRec->Layer[l], RefOrig->Layer[l], MotComp->Layer[l], mv->Layer[l], l, (l==start), (l==end));
    }

    layerBits[l] += (GlobalBitCount - Count);

    if ((l-1) >= 0) {
      InterpolateFrameOnepass (Decoded->Layer[l], Decoded->Layer[l-1]);
#ifdef BPMC1
      InterpolateFrameOnepass (MotComp->Layer[l], MotComp->Layer[l-1]);
#endif
      if (l<=1)
	InterpolateMVField2(mv->Layer[l], mv->Layer[l-1], RefRec->Layer[l]->TempRef, Actual->Layer[l]->TempRef, RefRec->Layer[l-1]->TempRef, Actual->Layer[l-1]->TempRef, 1);
    }
#ifdef SAVE_DECODED
    sprintf(name,"decoded.%d", l);
    SaveYUVKhorosFormat(Decoded->Layer[l], name,TempRef);
#endif
  }
  SMEndFrameCoding();

  currBits = 0;
  for (l=3; l>=0; l--) {
    currBits += layerBits[l];
    fprintf(stderr,"P-Rate %d: %7.2f kbit/s\n", l, 25 * ((currBits) / (double) (1000*(TempRef+1))));
  }
}
