c++ - MPI BMP Image comparison more efficient -


i made simple program in compare 2 images pixel pixel , determine if pictures same. i'm trying adapt mpi, i'm afraid communications taking long making way more inefficient sequential counterpart. have tried images of big resolution , result same: sequential code more efficient parallel code. there's way of making more efficient?

sequential code:

    #include <stdio.h>     #include <stdlib.h>     #include <time.h>      unsigned char* bmp(char* filename,int* sizes)     {         int i;         file* f = fopen(filename, "rb");         unsigned char info[54];         fread(info, sizeof(unsigned char), 54, f);          int ancho = *(int*)&info[18];         int alto = *(int*)&info[22];          int size = 3 * ancho * alto;         *sizes = size;         unsigned char* data = new unsigned char[size];         fread(data, sizeof(unsigned char), size, f);         fclose(f);         for(i = 0; < size; += 3)         {                 unsigned char tmp = data[i];                 data[i] = data[i+2];                 data[i+2] = tmp;         }          return data;     }      int main(int argc,char **argv){       int sizes,i,bol;       clock_t t1,t2;       double tiemp;       t1 = clock();       bol=1;       unsigned char* data1= bmp(argv[1],&sizes);       unsigned char* data2= bmp(argv[2],&sizes);       (i =0; i<sizes; += 3)       {         if(data1[i]!=data2[i]){           printf("the images not same\n");           bol=0;           break;}         }        if(bol==1)        printf("the images same\n");         t2 = clock();        tiemp = ((double) (t2 - t1)) / (clocks_per_sec);        printf("%f\n",tiemp );       return 0;      } 

mpi counter part

    #include <stdio.h>     #include <stdlib.h>     #include <mpi.h>     #include <time.h>      unsigned char* bmp(char* filename,int* sizes)     {         int i;          file* f = fopen(filename, "rb");         unsigned char info[54];         fread(info, sizeof(unsigned char), 54, f);          int ancho = *(int*)&info[18];         int alto = *(int*)&info[22];          int size = 3 * ancho * alto;         *sizes = size;         unsigned char* data = new unsigned char[size];         fread(data, sizeof(unsigned char), size, f);         fclose(f);         for(i = 0; < size; += 3)         {                 unsigned char tmp = data[i];                 data[i] = data[i+2];                 data[i+2] = tmp;         }          return data;     }      int main(int argc,char **argv){       int sizes,i,world_rank,world_size;       clock_t t1,t2;       double tiemp;       t1 = clock();        mpi_init(&argc, &argv);       mpi_comm_rank(mpi_comm_world, &world_rank);       mpi_comm_size(mpi_comm_world, &world_size);       unsigned char* data1;       unsigned char* data2;       int root = 0;       if(world_rank==0){       data1= bmp(argv[1],&sizes);       data2= bmp(argv[2],&sizes);       printf("%d",sizes);       }       mpi_bcast(&sizes,1,mpi_int,root,mpi_comm_world);       int num_elements_por_proc = sizes/world_size;       unsigned char* subdata2=new unsigned char[num_elements_por_proc];       unsigned char* subdata1=new unsigned char[num_elements_por_proc];       mpi_scatter( data1, num_elements_por_proc, mpi_unsigned_char, subdata1, num_elements_por_proc, mpi_unsigned_char, root, mpi_comm_world );       mpi_scatter( data2, num_elements_por_proc, mpi_unsigned_char, subdata2, num_elements_por_proc, mpi_unsigned_char, root, mpi_comm_world );       int bol = 0;       if(world_rank!=0){           for(i=0;i<=num_elements_por_proc;i++){           if(subdata1[i]!=subdata2[i]){             bol = 1;             break;           }          }      }      int bolls;      mpi_reduce(&bol,&bolls,1, mpi_int, mpi_sum, 0,mpi_comm_world);       if(world_rank==0){       if(bolls !=0){         printf("the images not samen");}       else{         printf("the images same \n" );}       t2 = clock();      tiemp = ((double) (t2 - t1)) / (clocks_per_sec);      printf("%f\n",tiemp );      }      mpi_finalize();      return 0;      } 

this code not suitable parallelization. bottleneck reading file. if file in memory of root process, sending data , looking @ each data element (actually 1/3 of them) once, cannot faster doing on root process itself.

the way exploit parallelism here store files distributed, , read them distributed. instance compute hash on each node , compare those.

a few more remarks:

  • consider using mpi_lor (logical or) reduction instead of addition
  • std::swap instead of tmp
  • pair each new delete, in example code.
  • format code properly. own sake , sake of people having read here. if lazy, use tool clang-format.

Popular posts from this blog

php - How should I create my API for mobile applications (Needs Authentication) -

5 Reasons to Blog Anonymously (and 5 Reasons Not To)

Google AdWords and AdSense - A Dynamic Small Business Marketing Duo