    /*        Fast GEMM routine for Alpha                  */
    /*           Linux, Digital UNIX and NT/Alpha          */
    /*        by Kazushige Goto <goto@statabo.rim.or.jp>   */


#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <sys/times.h>
#include <sys/time.h>
#include <time.h>
#include "common.h"
#include "bmcommon.h"

int main(int argc, char *argv[]){

  FLOAT *a, *b, *c, *d;
  int i, j, errcount;
  int m, n, k;
  int nrowa, nrowb;
  int transa, transb;
  FLOAT alpha = 3.0;
  FLOAT beta  = 1.0;
  int size;
  int only_beta = 2;

  fprintf(stderr, "\n\tMatrix-Matrix Multiply"
#ifdef DGEMM
	  "(Double Precision) "
#else
	  "(Single Precision) "
#endif
	  "Check Routine\n"
	          "\t\t\t by Kazushige Goto <goto@statabo.rim.or.jp>\n\n");

  argc--;argv++; 
  size = 400;
  if (argc > 0) { size      = atol(*argv);argc--; argv++;}
  if (argc > 0) { alpha     = atof(*argv);argc--; argv++;}
  if (argc > 0) { beta      = atof(*argv);argc--; argv++;}
  if (argc > 0) { only_beta = atof(*argv);argc--; argv++;}

  fprintf(stderr, "\tSize = %4d\n", size);
  fprintf(stderr, "\tAlpha = %e Beta = %e\n\n", alpha, beta);

  if (( a=(FLOAT *)malloc(sizeof(FLOAT) * size * size)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
 
  if (( b=(FLOAT *)malloc(sizeof(FLOAT) * size * size)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
  
  if (( c=(FLOAT *)malloc(sizeof(FLOAT) * size * size)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }

  if (( d=(FLOAT *)malloc(sizeof(FLOAT) * size * size)) == NULL){
    fprintf(stderr,"Out of Memory!!\n");exit(1);
  }
  
  srandom(getpid());

  for(i = 0; i < size; i++) for(j = 0; j < size; j++){
    a[i*size +j] = rand() / ((double) RAND_MAX + 1.0 )*10.0;
    b[i*size +j] = rand() / ((double) RAND_MAX + 1.0 )*10.0; 
    c[i*size +j] = rand() / ((double) RAND_MAX + 1.0 )*10.0;
    d[i*size +j] = c[i*size +j];
  }

  for(m = 1; m <= size; m++){
    for(n = 1; n <= size; n++){
      for(k = 1; k <= size; k++){

	m = k;
	n = k;

	fprintf(stderr, "m = %3d n = %3d k = %3d : ", m, n, k);

	for (transa=0; transa < only_beta; transa++){
	  for (transb=0; transb < only_beta; transb++){

	    if (transa == 0 || transa == 2) nrowa = m; else nrowa = k;
	    if (transb == 0 || transb == 2) nrowb = k; else nrowb = n;

	    if ((m >= nrowa) && (n >= nrowb)  && (k >= m)){

	      for(i = 0; i < m; i++) for(j = 0; j < n; j++){
		c[i*n +j] = rand() / ((double) RAND_MAX + 1.0 )*10.0;
		d[i*n +j] = c[i*n+j];
	      }

	      GEMMC_(trans[transa], trans[transb], &m, &n, &k, 
		      &alpha, a, &m, b, &n, &beta, c, &k);

	      GEMM_ (trans[transa], trans[transb], &m, &n, &k, 
		      &alpha, a, &m, b, &n, &beta, d, &k);
	      
	      errcount = 0;
	      for(i = 0; i < size; i++){
		for(j = 0;j < size; j++){
		  if (diffs(c[i*size+j], d[i*size+j])){
		    if (!errcount) fprintf(stderr,"\n");
		    fprintf(stderr, "%3d %3d :%6.1f %6.1f\n",
			    i, j, c[i*size +j], d[i*size+j]);
		    errcount ++;
		    if (errcount > 2) exit(1);
		  }
		}
	      }
	      fprintf(stderr, "G ");
	    } else {
	      fprintf(stderr, "S ");
	    }
	  }
	}
	fprintf(stderr, "\n");
      }
    }
  }
  
  free(a);
  free(b);
  free(c);
  free(d);
  return 0;
}
