int main()
{
	FILE *fp;
	float a1, a2;
	double Aarray[] = { 1,1,-0.1,1 };
	double Harray[] = { 1,1,0,1 };
	Mat *X[100],*Qk,*Rk,*A,*H,*Z,*Pk,*K;
	int iter = 0;
	fopen_s(&fp, "measure.txt", "r");
	Z = mallocMat(2, 1, TYPE_FLOAT);
	X[0] = mallocMat(2, 1, TYPE_FLOAT);
	setVecVal(X[0], 0, 0.1);
	setVecVal(X[0], 1, 0.9);
	A = scanMat(2, 2, TYPE_FLOAT, Aarray);
	H = scanMat(2, 2, TYPE_FLOAT, Harray);
	Qk = identity(2, TYPE_FLOAT);
	Rk = identity(2, TYPE_FLOAT);
	Pk = identity(2, TYPE_FLOAT);
	K = zerosMat(2, 2, TYPE_FLOAT);
	if (!fp)
	{
		return -1;
	}
	while (!feof(fp))
	{
		fscanf_s(fp, "%f %f", &a1, &a2);
		if (iter == 0)
		{
			iter++;
			continue;
		}
		setVecVal(Z, 0, a1);
		setVecVal(Z, 1, a2);
		X[iter] = mallocMat(2, 1, TYPE_FLOAT);
		predictState(A, X[iter - 1], X[iter]);
		predictErrorVariance(A, Pk, Qk, Pk);
		if (iter % 5 == 0)
		{
			getK(Pk, H, Rk, K);
			stateCorrect(X[iter], K, Z, H, X[iter]);
			varianceCorrect(Pk, K, H, Pk);
		}
		iter++;
	}
	
	return 1;

}
void main(int argc, char *argv[]){
    int id,np;
    int dim1,dim2,dim3;
    double *a,*b,*c;
    double *adist,*bdist,*cdist;

    MPI_Init(&argc,&argv);

    sscanf(argv[1],"%d",&dim1);
    sscanf(argv[2], "%d",&dim2);
    sscanf(argv[3],"%d",&dim3);

    MPI_Comm_rank(MPI_COMM_WORLD, &id);

    MPI_Comm_size(MPI_COMM_WORLD, &np);

    if(id==0){
	a=malloc(sizeof(double)*dim1*dim2);
	b=malloc(sizeof(double)*dim2*dim3);
	matrizAleatoria(dim1,dim2,a);
	matrizAleatoria(dim2,dim3,b);
	b[1] = 8;
	b[5] = 10;
	b[15] = 0;
	printf("A = \n");
	impMat(dim1,dim2,a);
	printf("B = \n");
	impMat(dim1,dim2,b);
    }

    adist=malloc(sizeof(double)*(dim1*dim2)/np);
    bdist=malloc(sizeof(double)*(dim2*dim3)/np);


    MPI_Scatter(a,(dim1*dim2)/np,MPI_DOUBLE,adist,(dim1*dim2)/np,MPI_DOUBLE,0,MPI_COMM_WORLD);

    MPI_Scatter(b,(dim2*dim3)/np,MPI_DOUBLE,bdist,(dim2*dim3)/np,MPI_DOUBLE,0,MPI_COMM_WORLD);


    //impMat(dim1/np,dim2,adist);

    //impMat(dim2/np,dim3,bdist);



    char TRANSA, TRANSB;
    int M, N, K;
    double ALPHA, BETA;

    TRANSA = 'N';
    TRANSB = 'N';
    ALPHA = 1.0;
    BETA = 1.0;

    M=dim1/np;
    K=dim2/np; 
    N=dim3;

    cdist=malloc(sizeof(double)*(dim1*dim3)/np);

    zerosMat(dim1/np,dim3,cdist);

    int j;
    double *asub;

    asub=malloc(sizeof(double)*(dim1/np*dim2/np));

    for(j=0;j<np;j++){

	asub=subMatrix(dim1,dim2,mod(id-j,np),np,adist);

	dgemm_(&TRANSA, &TRANSB, &N, &M, &K, &ALPHA, bdist, &N, asub, &K, &BETA, cdist, &N);

	printf("---------------------------------------\n");
	printf("i = %d\tj = %d\tnump = %d\n", id, j, mod(id-j,np));
	printf("Aij = \n");
	impMat(dim1/np,dim2/np,asub);
	//printf("Ai = \n");
	//impMat(dim1/np,dim2,adist);
	printf("Bi = \n");
	impMat(dim2/np,dim3,bdist);
	printf("Ci (resultado parcial) = \n");
	impMatTransp(dim3,dim1/np,cdist);
	printf("---------------------------------------\n");

	MPI_Send(bdist, (dim2*dim3)/np, MPI_DOUBLE, mod(id + 1,np), mod(id-j,np),MPI_COMM_WORLD);

	MPI_Recv(bdist,(dim2*dim3)/np, MPI_DOUBLE, mod(id - 1,np), mod(id-j-1,np), MPI_COMM_WORLD,MPI_STATUS_IGNORE);

    }



    if(id==0){
	free(a);
	free(b);
    }

    free(adist);
    free(bdist);
    free(asub);

    MPI_Finalize();
}