#include <mpi.h>
#include <stdio.h>
#include <stdlib.h>

// ----------------------------------------------------------------------------

/**
 * Matrix instace data.
 */
typedef struct {
	int r, c;
	int *elements;
} Matrix;

int matrix_get_r(Matrix *this) {
	return this->r;
}

int matrix_get_c(Matrix *this) {
	return this->c;
}

void matrix_set_element(Matrix *this, int r, int c, int v) {
	this->elements[r*(this->c) + c] = v;
}

int matrix_get_element(Matrix *this, int r, int c) {
	return this->elements[r*(this->c) + c];
}

Matrix *matrix_new(int r, int c) {
	Matrix *ret;
	
	ret = (Matrix *) malloc(sizeof(Matrix));
	ret->r = r;
	ret->c = c;
	ret->elements = (int *) malloc(r*c*sizeof(int));
	
	return ret;
}

/**
 * Sends this matrix from master to slave.
 */
void matrix_send(Matrix *this, int pid) {
	MPI_Send((void *) &(this->r), 1, MPI_INT, pid, 0, MPI_COMM_WORLD);
	MPI_Send((void *) &(this->c), 1, MPI_INT, pid, 0, MPI_COMM_WORLD);
	MPI_Send((void *) this->elements, (this->r)*(this->c), MPI_INT, pid, 0, MPI_COMM_WORLD);
}

/**
 * Receives a matrix from master.
 */
Matrix *matrix_recv(int pid) {
	Matrix *ret;
	int r, c;
	
	MPI_Recv(&r, 1, MPI_INT, pid, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	MPI_Recv(&c, 1, MPI_INT, pid, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	ret = matrix_new(r, c);
	MPI_Recv(ret->elements, r*c, MPI_INT, pid, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);

	return ret;
}

/**
 * Prints this matrix to screen.
 */
void matrix_print(Matrix *this) {
	int r, c;

	for (r = 0; r < this->r; r++) {
		for (c = 0; c < this->c; c++) {
			printf("%d\t", this->elements[r*(this->c) + c]);
		}
		printf("\n");
	}
}

void matrix_delete(Matrix *this) {
	free(this->elements);
	this->elements = NULL;
	free(this);
}

// ----------------------------------------------------------------------------

/**
 * Matrix 1D segment instance data.
 *
 * The output matrix is segented to 1D segments, which will be calculated by
 * the master and slave processes.
 */
typedef struct {
	int size, ibegin;
	int *elements;
} Segment;

/**
 * Prints this segment to screen.
 */
void segment_print(Segment *this) {
	int i;
	
	printf("size = %d, ibegin = %d\n", this->size, this->ibegin);
	for (i = 0; i < this->size; i++) {
		printf("%d\n", this->elements[i]);
	}
}

/**
 * Returns the multiplied segment.
 */
Segment *segment_new(Matrix *m1, Matrix *m2, int num_procs, int pid) {
	Segment *ret;
	int     result_r, result_c, r, c;
	int     mod;
	int     i, j;

	result_r = matrix_get_r(m1);
	result_c = matrix_get_c(m2);

	ret = (Segment *) malloc(sizeof(Segment));
	mod = (result_r*result_c)%num_procs;
	if (mod == 0) {
		ret->size   = result_r*result_c/num_procs;
		ret->ibegin = pid*ret->size;
	} else if (pid == num_procs - 1) {
		ret->size   = mod;
		ret->ibegin = result_r*result_c - mod;		
	} else {
		ret->size   = num_procs;
		ret->ibegin = num_procs*pid;
	}
	ret->elements = (int *) malloc(ret->size*sizeof(int));

	// Multiply the 2 matrices
	for (i = 0; i < ret->size; i++) {
		r = (ret->ibegin + i)/result_c;
		c = (ret->ibegin + i)%result_c;
		ret->elements[i] = 0;
		for (j = 0; j < matrix_get_c(m1); j++) {
			ret->elements[i] += matrix_get_element(m1, r, j)*matrix_get_element(m2, j, c);			
		}
	}

	printf("Segment calculated by process %d:\n", pid);
	segment_print(ret);

	return ret;
}

/**
 * Send this segment from slave to master.
 */
void segment_send(Segment *this, int pid) {
	MPI_Send((void *) &(this->size), 1, MPI_INT, pid, 0, MPI_COMM_WORLD);
	MPI_Send((void *) &(this->ibegin), 1, MPI_INT, pid, 0, MPI_COMM_WORLD);
	MPI_Send((void *) this->elements, this->size, MPI_INT, pid, 0, MPI_COMM_WORLD);
}

/**
 * Returns the segment received from a slave.
 */
Segment *segment_recv(int pid) {
	Segment *ret;

	ret = (Segment *) malloc(sizeof(Segment));	
	MPI_Recv(&(ret->size), 1, MPI_INT, pid, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	MPI_Recv(&(ret->ibegin), 1, MPI_INT, pid, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
	ret->elements = (int *) malloc(ret->size*sizeof(int));
	MPI_Recv(ret->elements, ret->size, MPI_INT, pid, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);

	return ret;
}

/**
 * Used to merge segments calculated by different processes to create a complete matrix.
 */
void segment_merge_to_matrix(Segment *this, Matrix *matrix) {
	int i, r, c, v;
	int result_c;

	result_c = matrix_get_c(matrix);
	for (i = 0; i < this->size; i++) {
		r = (this->ibegin + i)/result_c;
		c = (this->ibegin + i)%result_c;
		v = this->elements[i];
		matrix_set_element(matrix, r, c, v);
	}
}

void segment_delete(Segment *this) {
	free(this->elements);
	this->elements = NULL;
	free(this);
}

// ----------------------------------------------------------------------------

/**
 * Main program data.
 */
typedef struct {
	int num_procs, pid;
	Matrix *m1, *m2;
} Main;

/**
 * Input file format:
 * rows1 columns1
 * rows2 columns2
 * matrix1 (rows1 x columns1)
 * matrix2 (rows2 x columns2)
 */
void _main_load_matrices(Main *this, const char *file_name) {
	FILE *file;
	int  r, c, tmp;

	file = fopen(file_name, "r");

	fscanf(file, "%d", &r);
	fscanf(file, "%d", &c);
	this->m1 = matrix_new(r, c);
	
	fscanf(file, "%d", &r);
	fscanf(file, "%d", &c);
	this->m2 = matrix_new(r, c);

	for (r = 0; r < matrix_get_r(this->m1); r++) {
		for (c = 0; c < matrix_get_c(this->m1); c++) {
			fscanf(file, "%d", &tmp);
			matrix_set_element(this->m1, r, c, tmp);
		}
	}

	for (r = 0; r < matrix_get_r(this->m2); r++) {
		for (c = 0; c < matrix_get_c(this->m2); c++) {
			fscanf(file, "%d", &tmp);
			matrix_set_element(this->m2, r, c, tmp);
		}
	}

	fclose(file);
	file = NULL;
}

Main *main_new(int argc, char **argv) {
	Main *ret;
	
	ret = (Main *) malloc(sizeof(Main));

	MPI_Init(&argc, &argv);
	MPI_Comm_size(MPI_COMM_WORLD, &ret->num_procs);
	MPI_Comm_rank(MPI_COMM_WORLD, &ret->pid);

	if (ret->pid == 0) {
		_main_load_matrices(ret, "matrix.dat");

		// Display input conditions
		printf("Matrix 1:\n");
		matrix_print(ret->m1);
		printf("Matrix 2:\n");
		matrix_print(ret->m2);
		printf("Number of processes: %d\n", ret->num_procs);

		// Simple input validation
		if (matrix_get_c(ret->m1) != matrix_get_r(ret->m2)) {
			printf("Cannot multiply: Invalid dimensions\n");
			exit(-1);
		}
	}

	return ret;
}

/**
 * Perform the master job.
 */
void _main_run_master(Main *this) {
	int     i;
	Segment *segment;
	Matrix  *result;
	
	// Send r, c and the 2 matrices to all slaves
	for (i = 1; i < this->num_procs; i++) {
		matrix_send(this->m1, i);
		matrix_send(this->m2, i);
	}

	result = matrix_new(matrix_get_r(this->m1), matrix_get_c(this->m2));

	// The master also performs calculation
	segment = segment_new(this->m1, this->m2, this->num_procs, 0);
	segment_merge_to_matrix(segment, result);
	segment_delete(segment);
	segment = NULL;

	// Receive results from all slaves
	for (i = 1; i < this->num_procs; i++) {
		segment = segment_recv(i);
		segment_merge_to_matrix(segment, result);
		segment_delete(segment);
		segment = NULL;
	}

	printf("Result:\n");
	matrix_print(result);
	matrix_delete(result);
	result = NULL;
}

/**
 * Perform the slave job.
 */
void _main_run_slave(Main *this) {
	Segment *segment;

	this->m1 = matrix_recv(0);
	this->m2 = matrix_recv(0);

	segment = segment_new(this->m1, this->m2, this->num_procs, this->pid);
	segment_send(segment, 0);
	segment_delete(segment);
	segment = NULL;
}

void main_run(Main *this) {
	if (this->pid == 0) {
		_main_run_master(this);
	} else {
		_main_run_slave(this);
	}
}

void main_delete(Main *this) {
	matrix_delete(this->m1);
	this->m1 = NULL;
	matrix_delete(this->m2);
	this->m2 = NULL;
	free(this);
	
	MPI_Finalize();
}

// ----------------------------------------------------------------------------

int main(int argc, char **argv) {
	Main *main;

 	main = main_new(argc, argv);
 	main_run(main);
 	main_delete(main);
	main = NULL;
 	
	return 0;
}

