c++ - MPI BMP Image comparison more efficient -
i made simple program in compare 2 images pixel pixel , determine if pictures same. i'm trying adapt mpi, i'm afraid communications taking long making way more inefficient sequential counterpart. have tried images of big resolution , result same: sequential code more efficient parallel code. there's way of making more efficient?
sequential code:
#include <stdio.h> #include <stdlib.h> #include <time.h> unsigned char* bmp(char* filename,int* sizes) { int i; file* f = fopen(filename, "rb"); unsigned char info[54]; fread(info, sizeof(unsigned char), 54, f); int ancho = *(int*)&info[18]; int alto = *(int*)&info[22]; int size = 3 * ancho * alto; *sizes = size; unsigned char* data = new unsigned char[size]; fread(data, sizeof(unsigned char), size, f); fclose(f); for(i = 0; < size; += 3) { unsigned char tmp = data[i]; data[i] = data[i+2]; data[i+2] = tmp; } return data; } int main(int argc,char **argv){ int sizes,i,bol; clock_t t1,t2; double tiemp; t1 = clock(); bol=1; unsigned char* data1= bmp(argv[1],&sizes); unsigned char* data2= bmp(argv[2],&sizes); (i =0; i<sizes; += 3) { if(data1[i]!=data2[i]){ printf("the images not same\n"); bol=0; break;} } if(bol==1) printf("the images same\n"); t2 = clock(); tiemp = ((double) (t2 - t1)) / (clocks_per_sec); printf("%f\n",tiemp ); return 0; }
mpi counter part
#include <stdio.h> #include <stdlib.h> #include <mpi.h> #include <time.h> unsigned char* bmp(char* filename,int* sizes) { int i; file* f = fopen(filename, "rb"); unsigned char info[54]; fread(info, sizeof(unsigned char), 54, f); int ancho = *(int*)&info[18]; int alto = *(int*)&info[22]; int size = 3 * ancho * alto; *sizes = size; unsigned char* data = new unsigned char[size]; fread(data, sizeof(unsigned char), size, f); fclose(f); for(i = 0; < size; += 3) { unsigned char tmp = data[i]; data[i] = data[i+2]; data[i+2] = tmp; } return data; } int main(int argc,char **argv){ int sizes,i,world_rank,world_size; clock_t t1,t2; double tiemp; t1 = clock(); mpi_init(&argc, &argv); mpi_comm_rank(mpi_comm_world, &world_rank); mpi_comm_size(mpi_comm_world, &world_size); unsigned char* data1; unsigned char* data2; int root = 0; if(world_rank==0){ data1= bmp(argv[1],&sizes); data2= bmp(argv[2],&sizes); printf("%d",sizes); } mpi_bcast(&sizes,1,mpi_int,root,mpi_comm_world); int num_elements_por_proc = sizes/world_size; unsigned char* subdata2=new unsigned char[num_elements_por_proc]; unsigned char* subdata1=new unsigned char[num_elements_por_proc]; mpi_scatter( data1, num_elements_por_proc, mpi_unsigned_char, subdata1, num_elements_por_proc, mpi_unsigned_char, root, mpi_comm_world ); mpi_scatter( data2, num_elements_por_proc, mpi_unsigned_char, subdata2, num_elements_por_proc, mpi_unsigned_char, root, mpi_comm_world ); int bol = 0; if(world_rank!=0){ for(i=0;i<=num_elements_por_proc;i++){ if(subdata1[i]!=subdata2[i]){ bol = 1; break; } } } int bolls; mpi_reduce(&bol,&bolls,1, mpi_int, mpi_sum, 0,mpi_comm_world); if(world_rank==0){ if(bolls !=0){ printf("the images not samen");} else{ printf("the images same \n" );} t2 = clock(); tiemp = ((double) (t2 - t1)) / (clocks_per_sec); printf("%f\n",tiemp ); } mpi_finalize(); return 0; }
this code not suitable parallelization. bottleneck reading file. if file in memory of root process, sending data , looking @ each data element (actually 1/3 of them) once, cannot faster doing on root process itself.
the way exploit parallelism here store files distributed, , read them distributed. instance compute hash on each node , compare those.
a few more remarks:
- consider using
mpi_lor
(logical or) reduction instead of addition std::swap
instead oftmp
- pair each new delete, in example code.
- format code properly. own sake , sake of people having read here. if lazy, use tool
clang-format
.