/* ** shm.c: ** a quick solution for multi-process(or) shared memory ** programming using UNIX fork(), shmop(), semop(), and ** mixed Fortran and C coding. Perhaps the quickest way ** to convert a brute force f77 code to work in one box ** parallelly and retain its efficiency. ** ** Li Ju. May 27, 1997. */ #include #include #include #include #include #include #include #include #include #define _MAX_SHM_SEG 6 /* on Xolas, otherwise EMFILE error */ struct shm_list { key_t KEY; /* shared memory key */ int ID; /* shared memory ID */ void * addr; /* attached logical address */ int size; /* size in bytes */ } shm[_MAX_SHM_SEG]; int num_shm_seg; /* allocated number of shared memory segments */ #define _MAX_PROCESSORS 8 /* on Xolas, otherwise EMFILE error */ double *shm_base; /* base addr pointer for shm operations */ double *u,*u2,*u4,*u6,*u8,*u10,*uold,*copy[_MAX_PROCESSORS]; volatile char *mission_flags; volatile int *domain; /* Must be volatile because after optimization a process might chose to read these variables only once, as there is no "explicit" data dependence. */ #define U_TO_U2 1 #define U2_TO_U4 2 #define U4_TO_U6 3 #define U6_TO_U8 4 #define U8_TO_U10 5 #define COLLECT_MY_DOMAIN_TO_U2 11 #define COLLECT_MY_DOMAIN_TO_U4 12 #define COLLECT_MY_DOMAIN_TO_U6 13 #define COLLECT_MY_DOMAIN_TO_U8 14 #define COLLECT_MY_DOMAIN_TO_U10 15 #define ZERO_U2_U10 21 #define SLAVE_DIE 99 int size_of_vector_in_bytes; int size_of_vector_in_double; double *vv2, *vv4, *vv6, *vv8, *vv10; /* link with fortran common block "offset" to provide handles for fortran use */ extern struct { int SU,SU2,SU4,SU6,SU8,SU10,SCOPY; } offset_ ; /* link with fortran common block "layout" */ extern struct { int num_processors; int domain; } layout_ ; key_t sem_KEY; /* semaphore key */ int sem_ID; /* semaphore ID */ struct sembuf semop_add, semop_minus, semop_zero; union semun /* { int val; struct semid_ds *buf; ushort *array; } */ sem_set; void Error(const char *fmt,...) { printf("Error:%s(%s)\n",fmt,strerror(errno)); } int generate_and_init_semaphore_ (int *num_processors) { int i; for (sem_KEY=1; ((sem_ID=semget(sem_KEY,(*num_processors),IPC_CREAT|0600))==-1) &&(sem_KEY<65535); sem_KEY++); printf ("semaphore KEY = %d ID = %d\n", sem_KEY, sem_ID); if(sem_ID!=-1) { /* define semaphore operations */ semop_add.sem_op = +1; semop_add.sem_flg = 0600; semop_minus.sem_op = -1; semop_minus.sem_flg = 0600; semop_zero.sem_op = 0; semop_zero.sem_flg = 0600; /* hypnotize the slave processes first */ for (i=1;i1) { master_set_command (SLAVE_DIE); master_wait(); } } void free_semaphore_ () { if(semctl(sem_ID, 0, IPC_RMID, NULL)==-1) printf("Unable to remove semaphore ID=%d.\n",sem_ID); } void free_shm_ () { for (num_shm_seg--;num_shm_seg>=0;num_shm_seg--) if (shmctl(shm[num_shm_seg].ID, IPC_RMID, NULL) == -1) printf("Unable to remove shared memory ID=%d.\n",shm[num_shm_seg].ID); } /* manage a shared memory allocation list */ void *apply_for_shm(int size) { static int last_KEY = 0; if (num_shm_seg==_MAX_SHM_SEG) { printf ("Error: maximum number (%d) of shared memory segments exceeded.\n"); return(NULL); } shm[num_shm_seg].size = size; for (shm[num_shm_seg].KEY = last_KEY+1; ((shm[num_shm_seg].ID=shmget(shm[num_shm_seg].KEY,size,IPC_CREAT|0600))==-1) &&(shm[num_shm_seg].KEY<65535); shm[num_shm_seg].KEY++); if(shm[num_shm_seg].ID!=-1) { printf ("#%d: KEY = %d ID = %d\n", num_shm_seg+1,shm[num_shm_seg].KEY,shm[num_shm_seg].ID); /* system will find a good logical address to attach the shared memory segment to. should be rounded to 16 bytes. */ shm[num_shm_seg].addr = shmat(shm[num_shm_seg].ID,(char *)0,0600|SHM_RND); if(((int)shm[num_shm_seg].addr)==-1) { Error("Failed to attach shared memory segment"); return (NULL); } else { printf("Success in getting %d bytes of shared memory for application %d.\n" ,size,num_shm_seg+1); /* clear the allocated region memset(shm[num_shm_seg].addr,0,size); */ last_KEY = shm[num_shm_seg].KEY; return (shm[num_shm_seg++].addr); } } else { printf("Failed to get %d bytes of shared memory for application %d.\n" ,size,num_shm_seg+1); /* Error ("System error:"); */ return (NULL); } } int generate_shm_ (int *n3, int *num_processors) { int i, not_null=1; int totalsize = 2*sizeof(int)*(*num_processors) /* memeory for domain */ + sizeof(char)*(*num_processors) /* memeory for mission_flags */ + sizeof(double)*2*(*n3)*(6+(*num_processors)); /* for u,..,copy */ num_shm_seg = 0; size_of_vector_in_double = 2*(*n3); size_of_vector_in_bytes = 2*(*n3)*sizeof(double); /* first see if we can allocate everything in one piece */ if((shm_base=(double *)apply_for_shm(totalsize))!=NULL) { u = shm_base; u2 = u + 2*(*n3); u4 = u2 + 2*(*n3); u6 = u4 + 2*(*n3); u8 = u6 + 2*(*n3); u10 = u8 + 2*(*n3); for (i=0;i<*num_processors;i++) copy[i] = u10+2*(*n3)*(i+1); domain = (int *) (copy[0]+2*(*n3)*(*num_processors)); mission_flags = (char *) (domain+2*(*num_processors)); goto offset; } printf ("Unable to allocate shared memory in one block.\n"); printf ("Let us try segments...\n"); /* people should work out plans to maximally cramp their allocations into blocks which enhances cache hit */ shm_base = (double *) apply_for_shm(size_of_vector_in_bytes*3+2*sizeof(int)*(*num_processors)); u = shm_base; u2 = u + 2*(*n3); u4 = u2 + 2*(*n3); domain = (int *) (u4+2*(*n3)); not_null &= (u!=NULL); u6 = (double *) apply_for_shm(size_of_vector_in_bytes*3+sizeof(char)*(*num_processors)); u8 = u6 + 2*(*n3); u10 = u8 + 2*(*n3); mission_flags = (char *)(u10+2*(*n3)); not_null &= (u6!=NULL); for (i=0;i<*num_processors;i++) { if (i%2==0) { copy[i] = apply_for_shm(size_of_vector_in_bytes*2); not_null &= (copy[i]!=NULL); } else copy[i] = copy[i-1] + 2*(*n3); } if (!not_null) { printf ("\n Allocation failed.\n"); return (0); } else printf ("\n Allocation success !!\n"); offset: /* offset list, in terms of double */ offset_.SU = u - shm_base; offset_.SU2 = u2 - shm_base; offset_.SU4 = u4 - shm_base; offset_.SU6 = u6 - shm_base; offset_.SU8 = u8 - shm_base; offset_.SU10 = u10 - shm_base; /* copy the domain directive from Fortran into shared memory */ memcpy ((int *)domain, &layout_.domain, 2*sizeof(int)*(*num_processors)); return (1); } void my_error_handler (int signal_number) { printf ("\n Exceptions %d occured: use my own error handler.\n", signal_number); if (layout_.num_processors>1) { free_shm_(); free_semaphore_(); } exit(1); } void set_error_handler_ () { signal(SIGINT, &my_error_handler); signal(SIGTERM, &my_error_handler); } void reset_error_handler_ () { signal(SIGINT, SIG_DFL); signal(SIGTERM, SIG_DFL); }