C ------------------------------------------------------------------------ C PROGRAM MultiChannel (double complex, parallel shared memory C version, comes with C program shm.c) C ------------------------------------------------------------------------ C 1. USE TERSOFF POTENTIAL C 2. CALCULATE DYNAMICAL MATRIX FOR ARBITRARY CONFIGURATION C 3. USE MULTICHANNEL PERTURBATION AND SUPERCELL K METHOD TO C GET THE VIBRATIONAL LDOS FOR ANY ATOM IN ANY DIRECTION C 4. HIGH-PRECISION (ORDER-12) INTEGRATION OF EQUATION OF MOTION C 5. "SPIN-WAVE" ENCODED AMPLITUDE SERIES TO MINIMIZE THE ERROR C 6. SHIFT THE SPECTRUM FOR BETTER CONVERGENCE IN THE LOW C FREQUENCY END C C MAY 12, 1997 C DEVELOPED BY JU LI AT MIT C ------------------------------------------------------------------------ PROGRAM MultiChannel # include "mulcp.fh" C ------------------------------------------------------------------------ C MM is the maximum of frequency channels. C MIP is the maximum number of steps in one base cycle. # define _MM 500 # define _MIP 10*_MM PARAMETER (MM=_MM, MIP=_MIP) DOUBLE PRECISION DELTA,D_CC,D_PHASE,SECONDS_SPENT, A HA(NM*3,3),HB(NM*3,3),Q(3,3),HBIJ,VEC,VX,VY,VZ,W,MIN_W, A MAX_W,DEL2,DEL3,DEL4,DEL5,DEL6,DEL7,DEL8,DEL9,DEL10, A DEL11,ALPHA,DDSIN(MIP),DDCOS(MIP), A VA(MM),VB(MM),VC(MM),VL(MM),VM(MM),VN(MM),VALUE(MM), A W1(MM),W2(MM),W4(MM),W6(MM),W8(MM),W10(MM),DOS(3,2000),FPM, A AR(MM),AI(MM),DAMAX,PHI(MM),SHIFT_W,RATIO,UU,VV C Order-12 integration: # define _ORDER 12 DOUBLE COMPLEX U(3*NM),U2(3*NM),U4(3*NM),U6(3*NM),U8(3*NM),U8I,CC, A U10(3*NM),UOLD(3*NM),FUP(MM),UI,U1I,U2I,U3I,U4I,U5I,U6I,U7I, A U9I,U10I,U11I,UNI,UN1I,UN2I,UN3I,UN4I,UN5I,UN6I,UN7I,UN8I, A UN9I,UN10I,UN11I,UOI,UO2I,UO4I,UO6I,UO8I,UO10I,B(MM), A U2_IBB,U4_IBB,U6_IBB,U8_IBB,U10_IBB C MGP is the maximum number of frequency groups. # define _MGP 200 INTEGER RUNMODE, NSAMPLE, NPA, IBASE(_MGP) LOGICAL CONVERGED(MM), ALL_CONVERGED, ALREADY LOGICAL PRINT_IP, PRINT_FREQ, PRINT_FLOPRATE C ------------------------------------------------------------------------ C ------------------------------------------------------------------------ c Definitions for the multi-process code # define _MAX_PROCESSORS 64 PARAMETER (MAX_PROCESSORS=_MAX_PROCESSORS) INTEGER master_pid, my_pid, master_idx, my_idx INTEGER getpid, getuid, getgid, generate_shm, a generate_and_init_semaphore EXTERNAL getpid, getuid, getgid, generate_shm, a generate_and_init_semaphore EXTERNAL fork, kill_all_slaves, slave_wait_for_command, a free_shm, write_shm, read_shm, master_multiply_all, a startwatch, checkwatch, free_semaphore, my_error_handler, a set_error_handler,reset_error_handler COMMON /OFFSET/ SU, SU2, SU4, SU6, SU8, SU10, SCOPY INTEGER SU2, SU4, SU6, SU8, SU10, SCOPY COMMON /LAYOUT/ num_processors, domain(2*MAX_PROCESSORS) INTEGER num_processors, domain C ------------------------------------------------------------------------ CALL INITIALIZE_GENERAL_CONSTANTS CALL INITIALIZE_TERSOFF_CONSTANTS UPDATE = .TRUE. c ====================================================================== c Program input parameters READ (*,'(A70)') buf C "N: Number of atoms in the supercell:" READ *, N READ (*,'(/,A70)') buf C "Name of the structure:" READ (*,'(A70)') NAME_OF_STRUCTURE READ (*,'(/,A70)') buf C "Default geometry of the unit cell (a11,a22,a33,a12,a13,a23), in Angstrom:" READ *, H(1,1),H(2,2),H(3,3),H(1,2),H(1,3),H(2,3) H(2,1) = H(1,2) H(3,1) = H(1,3) H(3,2) = H(2,3) READ (*,'(/,A70)') buf C "Translation of the unit cell in three dimensions, NC(1,2,3):" READ *, NC(1), NC(2), NC(3) DO 549 I=1,3 DO 549 J=1,3 549 H(I,J) = H(I,J)*NC(I) READ (*,'(/,A70)') buf C "Read from configurational file (if 'NULL' then start from perfect Xtal):" READ (*,'(A70)') NAME_READ IF (NAME_READ(1:4).EQ."NULL") THEN C start from scratch READ_FROM_CONFIG = .FALSE. ELSE READ_FROM_CONFIG = .TRUE. ENDIF READ (*,'(/,A70)') buf C "Output local density of states in file:" READ (*,'(A70)') NAME_WRITE READ (*,'(/,A70,/,A70,/,A70,/,A70)') buf,buf,buf,buf C "Mode of operation: C (0 Stop at maximum iteration MAXITER) C (1 Stop when the base frequency channel converges) C (2 Stop when all frequency channels converge) READ *,RUNMODE READ (*,'(/,A70,/,A70,/,A70)') buf,buf,buf C "Number of supercell k-points that you want to sample C using Monte Carlo (1 and 4 would invoke Cohen-Chadi C k-points for simple cubic as default): " READ *,NSAMPLE READ (*,'(/,A70)') buf C "Random number generator seed:" READ *,ISEED c initiate random number generator ran1 DO I=1,MOD(ISEED,63346) XX = RAN1(ISEED) enddo READ (*,'(/,A70)') buf C "LDOS atom index (1-N):" READ *,IATOM READ (*,'(/,A70)') buf C "Cartesian direction on that atom:" READ *,Q(1,1),Q(1,2),Q(1,3) READ (*,'(/,A70)') buf C "Number of frequency groups" READ *,NPA READ (*,'(/,A70)') buf C "Mimimum(base) frequency in THz:" READ *,MIN_W READ (*,'(/,A70)') buf C "Maximum frequency(THz) of the spectrum:" READ *,MAX_W READ (*,'(/,A70)') buf C "Shift the spectrum by (THz):" READ *,SHIFT_W C Make the 2nd (real) freq. ratio times C the 1st (real) freq from zero. W1(1) = (INT(SHIFT_W/MIN_W)+1)*MIN_W W1(2) = W1(1)+MIN_W RATIO = 2.5 SHIFT_W = DSQRT(((RATIO*W1(1))**2-W1(2)**2)/(RATIO**2-1)) MAX_W = DSQRT(MAX_W**2+SHIFT_W**2) READ (*,'(/,A70)') buf C "KTWIST: twist period of the "spin-wave" amplitude series:" READ *, KTWIST READ (*,'(/,A70)') buf C "Number of steps of a period for the maximum frequency:" READ *, MINCYCLE READ (*,'(/,A70,/,A70)') buf,buf C "MINITER: number of iterations after which to start C monitoring convergence:" READ *,MINITER READ (*,'(/,A70)') buf C "MAXITER: maximum number of iterations:" READ *,MAXITER READ (*,'(/,A70)') buf C "Fluctuation ratio in three steps to decide convergence:" READ *,TOLE READ (*,'(/,A70)') buf C "Lower bound below which (after MINITER) the LDOS is considered 0:" READ *,TOLM READ (*,'(/,A70)') buf C "To run on how many processors:" READ *, NUM_PROCESSORS IF (NUM_PROCESSORS.GT.MAX_PROCESSORS) THEN PRINT *, NUM_PROCESSORS, A '> MAX_PROCESSORS, revise source code parameter.' STOP ENDIF READ (*,'(/,A70)') buf C "Default loading ratio of the master process:" READ *, ratio_master READ (*,'(/,A70)') buf C "PRINT_IP PRINT_FREQ PRINT_FLOPRATE:" READ *, PRINT_IP, PRINT_FREQ, PRINT_FLOPRATE c ====================================================================== IF (READ_FROM_CONFIG) THEN C -------------------------------------------------- C READ FROM CONFIGURATIONAL FILE: CALL READ_CONFIG ELSEIF (NAME_OF_STRUCTURE.EQ.'3C-SiC') THEN CALL ASSIGN_3C_SIC ELSEIF (NAME_OF_STRUCTURE.EQ.'HEX3C-SiC') THEN CALL ASSIGN_HEX3C_SIC ELSEIF (NAME_OF_STRUCTURE.EQ.'4H-SiC') THEN CALL ASSIGN_4H_SIC ELSE PRINT *,'THE DESIRED PERIODIC STRUCTURE IS NOT AVAILABLE.' STOP ENDIF C -------------------------------------------------- CALL ASSIGN_DYNAMICS IF (UPDATE) CALL UPDATE_VERLET_LIST CALL TERSOFF_DYNAMICAL c print *,h(1,1),h(1,2),h(1,3),h(2,1),h(2,2),h(2,3),h(3,3) c print *,'n = ',n c print *,'pote =', pote/n c do i = 1,n c print *, '*', i, index(i), index2(i) c enddo c do i = 1,3*n c print *,'**', i, idx(i) c enddo PRINT *, ' Dynamical matrix (',n*3,n*3,') construction complete.' PRINT *, ' ' c stop C --------------------------------------------------------- C Multi-Channel Perturbation Method N3 = 3*N IBB = 3*(IATOM-1) C ================================================================= C Do a unitary transformation on the dynamical matrix such that C our defined degree of freedom is a normal coordinate. VEC = DSQRT(Q(1,1)**2+Q(1,2)**2+Q(1,3)**2) Q(1,1) = Q(1,1)/VEC Q(1,2) = Q(1,2)/VEC Q(1,3) = Q(1,3)/VEC VX = RAN1(ISEED) VY = RAN1(ISEED) VZ = RAN1(ISEED) C take a cross-product V*C1 Q(2,1) = VY*Q(1,3)-Q(1,2)*VZ Q(2,2) = Q(1,1)*VZ-VX*Q(1,3) Q(2,3) = VX*Q(1,2)-Q(1,1)*VY VEC = DSQRT(Q(2,1)**2+Q(2,2)**2+Q(2,3)**2) Q(2,1) = Q(2,1)/VEC Q(2,2) = Q(2,2)/VEC Q(2,3) = Q(2,3)/VEC C take the cross-product C1*C2 Q(3,1) = Q(1,2)*Q(2,3)-Q(2,2)*Q(1,3) Q(3,2) = Q(2,1)*Q(1,3)-Q(1,1)*Q(2,3) Q(3,3) = Q(1,1)*Q(2,2)-Q(2,1)*Q(1,2) DO J=1,3 C scatter the part of sparse matrix HESS concerning C atom IATOM into an N3 by 3 matrix HA DO 730 I=1,N3 730 HA(I,J) = 0. DO 76 K=1,IDX(J+IBB) 76 HA(ID(K,J+IBB),J) = HESS(K,J+IBB) ENDDO C HB = HA*QT DO 8776 I=1,N3 DO 8776 J=1,3 HB(I,J) = 0. DO 8776 K=1,3 8776 HB(I,J) = HB(I,J)+HA(I,K)*Q(J,K) C get the middle part of HA DO 987 I=1,3 DO 987 J=1,3 987 HA(I,J) = HB(I+IBB,J) C multiply by Q, put back into HB DO 988 I=1,3 DO 988 J=1,3 HB(IBB+I,J) = 0. DO 988 K=1,3 988 HB(IBB+I,J) = HB(IBB+I,J)+Q(I,K)*HA(K,J) C re-insert the transformed matrix into HESS DO 876 J=IBB+1,IBB+3 IDX(J) = 0 DO 876 I=1,N3 HBIJ = HB(I,J-IBB) C first insert the columns IF (ABS(HBIJ).LT.TOL) GOTO 259 IDX(J) = IDX(J)+1 ID(IDX(J),J) = I HESS(IDX(J),J) = HBIJ C then symmetrically insert the rows. 259 IF (I.GE.IBB+1.AND.I.LE.IBB+3) GOTO 876 DO K=1,IDX(I) IF (J.EQ.ID(K,I)) THEN HESS(K,I) = HBIJ GOTO 876 ENDIF ENDDO IF (ABS(HBIJ).LT.TOL) GOTO 876 IDX(I) = IDX(I)+1 ID(IDX(I),I)=J HESS(IDX(I),I) = HBIJ 876 CONTINUE C ================================================================= IBB = IBB+1 C Count the number of non-zero elements in the sparse matrix: nonzero = 0 do I=1,N3 nonzero = nonzero+IDX(I) enddo IP = 0 DO 765 I=1,NPA c construct base frequency within (min_w, 2*min_w) IP = IP+1 IF (I.EQ.1) THEN DOS(1,IP) = MIN_W ELSE DOS(1,IP) = MIN_W+RAN1(ISEED)*MIN_W ENDIF DOS(2,IP) = 0.D0 IBASE(I) = IP ALPHA = DOS(1,IP) c construct multiples of this base frequency within max_w W = 2.D0*ALPHA 155 IF (W.GT.MAX_W) GOTO 765 IP = IP+1 DOS(1,IP) = W DOS(2,IP) = 0.D0 W = W+ALPHA GOTO 155 765 CONTINUE IBASE(NPA+1) = IP+1 DO 764 IW=1,NPA PRINT *,'Frequency group',iw,' in THz' DO 763 J=IBASE(IW),IBASE(IW+1)-1 IF (DOS(1,J).LT.SHIFT_W) THEN DOS(3,J) = DOS(1,J)-SHIFT_W ELSE C STORE THE REAL FREQUENCY IN DOS(3,J) DOS(3,J) = DSQRT(DOS(1,J)**2-SHIFT_W**2) ENDIF PRINT *,'Real w = ',DOS(3,J) 763 CONTINUE ALPHA = DOS(1,IBASE(IW))/UFREQ_IN_THZ MMAX = IBASE(IW+1)-IBASE(IW) IF (MMAX.GT.MM) THEN PRINT *, 'Highest frequency multiple =',MMAX,' >',MM, A ', more than mulcp can handle.' PRINT *, 'Increase _MM in mulcp.F and recompile.' STOP ENDIF MINT = MMAX*MINCYCLE PRINT *,'Base Cycle =',MINT PRINT *,' ' IF (MINT.GT.MIP) THEN PRINT *, 'Base cycle >',MIP,', more than mulcp can handle.' PRINT *, 'Increase _MIP in mulcp.F and recompile.' STOP ENDIF 764 CONTINUE IF (num_processors.gt.1) THEN C ----------------------------------------------------------------------- C ---------------------------------------------------- C Set layout directive for processors: here C we use "block" style simple partition. C The actual layout is subject to change by smart C applications with dynamical load balancing. C ---------------------------------------------------- n3avg = n3/num_processors domain(1) = 1 domain(2) = int(n3avg*ratio_master) n3avg = (n3-domain(2))/(num_processors-1) do i=2,num_processors domain(i*2-1) = domain(i*2-2)+1 domain(i*2) = domain(i*2-1)+n3avg-1 enddo domain(num_processors*2) = n3 C ------------------------------------------------------------------- C allocate shared memory for everything: if (generate_shm(n3,num_processors).eq.0) then print*,'Can not generate shared memory block. Check if there is' print*,'any undeleted shared memory allocation from previous run.' stop endif C ------------------------------------------------------------------- C apply for a semaphore set with num_processors components: if (generate_and_init_semaphore(num_processors).eq.0) then print*,'Can not create semaphore.' stop endif C ----------------------------------------------------------------------- ENDIF SUM_WEIGHT = 0.D0 DO 209 IK=1,NSAMPLE IF (NAME_OF_STRUCTURE.EQ.'3C-SiC') THEN CALL SAMPLE_3C_SIC_BZ(NSAMPLE,IK,SKX,SKY,SKZ,WEIGHT) ELSEIF (NAME_OF_STRUCTURE.EQ.'HEX3C-SiC') THEN CALL SAMPLE_HEX3C_SIC_BZ(NSAMPLE,IK,SKX,SKY,SKZ,WEIGHT) ELSEIF (NAME_OF_STRUCTURE.EQ.'4H-SiC') THEN CALL SAMPLE_4H_SIC_BZ(NSAMPLE,IK,SKX,SKY,SKZ,WEIGHT) ELSE CALL SAMPLE_3C_SIC_BZ(NSAMPLE,IK,SKX,SKY,SKZ,WEIGHT) ENDIF PRINT *, ' ' PRINT *, 'Wavevector',IK print *,'(',skx/2/PI, a sky/2/PI, a skz/2/PI, a '): weight=', weight PRINT *, ' ' SUM_WEIGHT = SUM_WEIGHT+WEIGHT c ------------------------------------------------------- c Assemble A(i,j) matrix here with the selected k-point: DO 83 I=1,N3 IPT = (I-1)/3+1 SXI = SX(IPT) SYI = SY(IPT) SZI = SZ(IPT) ALREADY = .FALSE. DO 82 K=1,IDX(I) J = ID(K,I) JPT = (J-1)/3+1 SXIJ = SXI - SX(JPT) SYIJ = SYI - SY(JPT) SZIJ = SZI - SZ(JPT) IF(SXIJ.GT.HALF) SXIJ=SXIJ-ONE IF(SXIJ.LT.-HALF) SXIJ=SXIJ+ONE IF(SYIJ.GT.HALF) SYIJ=SYIJ-ONE IF(SYIJ.LT.-HALF) SYIJ=SYIJ+ONE IF(SZIJ.GT.HALF) SZIJ=SZIJ-ONE IF(SZIJ.LT.-HALF) SZIJ=SZIJ+ONE D_PHASE = SKX*SXIJ+SKY*SYIJ+SKZ*SZIJ A(K,I) = HESS(K,I)*DCMPLX(COS(D_PHASE),-SIN(D_PHASE)) c shift the spectrum by (shift_w)**2 to get away from c the methodological singularity at w=0 IF (I.EQ.J.AND.(.NOT.ALREADY)) THEN A(K,I) = A(K,I)+(SHIFT_W/UFREQ_IN_THZ)**2 ALREADY = .TRUE. ENDIF 82 CONTINUE IF (.NOT.ALREADY) THEN IDX(I) = IDX(I)+1 ID(IDX(I),I) = I A(IDX(I),I) = (SHIFT_W/UFREQ_IN_THZ)**2 ENDIF 83 CONTINUE C ------------------------------------------------------- IF (num_processors.gt.1) THEN C ------------------------------------------------------- C Fork the program here: resources such as A(i,j) (read C only) will be automatically "cloned". call reset_error_handler Master_pid = getpid() My_pid = Master_pid Master_idx = 1 My_idx = Master_idx do i=2,num_processors if (My_pid.eq.Master_pid) then call fork My_pid = getpid() if (My_pid.ne.Master_pid) My_idx = i endif enddo C slave processors: wait here if (My_pid.ne.Master_pid) then call slave_wait_for_command (My_idx) call exit() endif ENDIF C Invoke my own exceptions handling mechanism (only the C master process), slave processes use the default. call set_error_handler C ------------------------------------------------------ DO 461 IW = 1,NPA C different frequency groups ALPHA = DOS(1,IBASE(IW))/UFREQ_IN_THZ MMAX = IBASE(IW+1)-IBASE(IW) MINT = MMAX*MINCYCLE DELTA = 2.D0*dacos(-1.D0)/ALPHA/MINT DEL2 = DELTA**2/2.D0 DEL3 = DELTA**3/6.D0 DEL4 = DELTA**4/24.D0 DEL5 = DELTA**5/120.D0 DEL6 = DELTA**6/720.D0 DEL7 = DELTA**7/5040.D0 DEL8 = DELTA**8/40320.D0 DEL9 = DELTA**9/362880.D0 DEL10 = DELTA**10/3628800.D0 DEL11 = DELTA**11/39916800.D0 D_CC = 2*DACOS(-1.D0)/MINT DO IP=1,MINT D_PHASE = D_CC*IP DDSIN(IP) = DSIN(D_PHASE) DDCOS(IP) = DCOS(D_PHASE) ENDDO DO M = 1,MMAX VA(M) = 1.D0 VB(M) = 2.D0 VC(M) = 5.D0 VL(M) = 1.D0 VM(M) = 1.D0 VN(M) = 1.D0 FUP(M) = ZERO W1(M) = M*ALPHA W2(M) = (M*ALPHA)**2 W4(M) = (M*ALPHA)**4 W6(M) = (M*ALPHA)**6 W8(M) = (M*ALPHA)**8 W10(M)= (M*ALPHA)**10 CONVERGED(M) = .FALSE. ENDDO c ---------------------------------------------------------- c To generate an error-cancelling amplitude series. c ++--++--++--++-- ar(1) = 1.d0 ar(2) = 1.d0 if (ran1(iseed).gt.0.5) ar(2) = -ar(2) do m=3,mmax ar(m)= -ar(m-2) enddo c +-+-+-+-+-+-+-+- parity = 1 if (ran1(iseed).gt.0.5) parity = -parity do m=1,mmax ai(m) = parity*ar(m) parity = -parity enddo c clear the empty channels do m=1,mmax if (dos(1,ibase(iw)+m-1).gt.SHIFT_W) goto 974 ar(m) = 0. ai(m) = 0. enddo 974 if (m.gt.2) then ar(m-2) = ar(m+2) ai(m-2) = ai(m+2) endif c randomly generate a "spin-wave" to avoid numerical c problems when integrating in time phi(1) = 0.d0 damax = 2*dacos(-1.d0)/ktwist if (ran1(iseed).gt.0.5) damax = -damax do m=1,mmax b(m) = DCMPLX(ar(m),ai(m))*DCMPLX(dcos(phi(m)),dsin(phi(m))) phi(m+1)=phi(m)+damax*ran1(iseed) enddo C ---------------------------------------------------------- C Pre-condition the integration: C only odd-power derivatives exist at t=0 C to save space, u2 stands for u3, u4 stands for u5 etc. do 612 i=1,n3 u(i) = DC_ZERO u2(i) = DC_ZERO u4(i) = DC_ZERO u6(i) = DC_ZERO u8(i) = DC_ZERO u10(i) = DC_ZERO 612 continue do m=1,mmax u2(ibb) = u2(ibb)+m*alpha*b(m) u4(ibb) = u4(ibb)-(m*alpha)**3*b(m) u6(ibb) = u6(ibb)+(m*alpha)**5*b(m) u8(ibb) = u8(ibb)-(m*alpha)**7*b(m) u10(ibb) = u10(ibb)+(m*alpha)**9*b(m) enddo do i=1,n3 do k=1,idx(i) jd = id(k,i) u4(jd) = u4(jd)-A(k,i)*u2(i) enddo enddo do i=1,n3 do k=1,idx(i) jd = id(k,i) u6(jd) = u6(jd)-A(k,i)*u4(i) enddo enddo do i=1,n3 do k=1,idx(i) jd = id(k,i) u8(jd) = u8(jd)-A(k,i)*u6(i) enddo enddo do i=1,n3 do k=1,idx(i) jd = id(k,i) u10(jd) = u10(jd)-A(k,i)*u8(i) enddo enddo do i=1,n3 u(i) = u2(i)*del3+u4(i)*del5+u6(i)*del7 c +u8(i)*del9+u10(i)*del11 uold(i) = DC_ZERO enddo u1i = DC_ZERO u3i = u2(ibb) u5i = u4(ibb) u7i = u6(ibb) u9i = u8(ibb) u11i = u10(ibb) ui = DC_ZERO u2i = DC_ZERO u4i = DC_ZERO u6i = DC_ZERO u8i = DC_ZERO u10i = DC_ZERO ip = 1 IF (PRINT_FLOPRATE) CALL startwatch() 576 ipp = mod(ip-1,mint)+1 if (PRINT_IP) print *, 'ip = ',ip C Calculate the perturbation on normal coordinate IBB u2_ibb = DC_ZERO u4_ibb = DC_ZERO u6_ibb = DC_ZERO u8_ibb = DC_ZERO u10_ibb = DC_ZERO do m=1,mmax ipm = mod(m*ipp-1,mint)+1 u2_ibb = u2_ibb+ddsin(ipm)*b(m) u4_ibb = u4_ibb-w2(m)*ddsin(ipm)*b(m) u6_ibb = u6_ibb+w4(m)*ddsin(ipm)*b(m) u8_ibb = u8_ibb-w6(m)*ddsin(ipm)*b(m) u10_ibb = u10_ibb+w8(m)*ddsin(ipm)*b(m) enddo IF (num_processors.eq.1) THEN C ---------------------------------------------------------- C Single processor version do i=1,n3 u2(i) = DC_ZERO u4(i) = DC_ZERO u6(i) = DC_ZERO u8(i) = DC_ZERO u10(i) = DC_ZERO enddo C Add perturbation u2(ibb) = u2_ibb u4(ibb) = u4_ibb u6(ibb) = u6_ibb u8(ibb) = u8_ibb u10(ibb)= u10_ibb do i=1,n3 do k=1,idx(i) u2(id(k,i)) = u2(id(k,i))-A(k,i)*u(i) enddo enddo do i=1,n3 do k=1,idx(i) u4(id(k,i)) = u4(id(k,i))-A(k,i)*u2(i) enddo enddo do i=1,n3 do k=1,idx(i) u6(id(k,i)) = u6(id(k,i))-A(k,i)*u4(i) enddo enddo do i=1,n3 do k=1,idx(i) u8(id(k,i)) = u8(id(k,i))-A(k,i)*u6(i) enddo enddo do i=1,n3 do k=1,idx(i) u10(id(k,i)) = u10(id(k,i))-A(k,i)*u8(i) enddo enddo C ---------------------------------------------------------- ELSE C Let multi processor do the work: call master_multiply_all (u,u2,u4,u6,u8,u10, A ibb,u2_ibb,u4_ibb,u6_ibb,u8_ibb,u10_ibb) ENDIF C ---------------------------------------------------------- c if (ip.eq.1) then c do i=1,n3 c print *, 'u', i, u(i) c print *, 'u2', i, u2(i) c print *, 'u4', i, u4(i) c print *, 'u6', i, u6(i) c print *, 'u8', i, u8(i) c print *, 'u10', i, u10(i) c print *, ' ' c enddo c endif if (ip.ne.1) then u11i = (u10(ibb)-uo10i)/2.d0/delta u9i = ((u8(ibb)-uo8i)/2.d0-del3*u11i)/delta u7i = ((u6(ibb)-uo6i)/2.d0-del3*u9i-del5*u11i)/delta u5i = ((u4(ibb)-uo4i)/2.d0-del3*u7i-del5*u9i-del7*u11i)/delta u3i = ((u2(ibb)-uo2i)/2.d0-del3*u5i-del5*u7i-del7*u9i c -del9*u11i)/delta u1i = ((u(ibb)-uoi)/2.d0-del3*u3i-del5*u5i-del7*u7i c -del9*u9i-del11*u11i)/delta endif c -------------------------------------------------------------------- c calculate the integral uni = ui+delta*u1i+del2*u2i+del3*u3i+del4*u4i+del5*u5i c +del6*u6i+del7*u7i+del8*u8i+del9*u9i+del10*u10i+del11*u11i un1i = u1i+delta*u2i+del2*u3i+del3*u4i+del4*u5i+del5*u6i c +del6*u7i+del7*u8i+del8*u9i+del9*u10i+del10*u11i un2i = u2i+delta*u3i+del2*u4i+del3*u5i+del4*u6i+del5*u7i c +del6*u8i+del7*u9i+del8*u10i+del9*u11i un3i = u3i+delta*u4i+del2*u5i+del3*u6i+del4*u7i+del5*u8i c +del6*u9i+del7*u10i+del8*u11i un4i = u4i+delta*u5i+del2*u6i+del3*u7i+del4*u8i+del5*u9i c +del6*u10i+del7*u11i un5i = u5i+delta*u6i+del2*u7i+del3*u8i+del4*u9i+del5*u10i c +del6*u11i un6i = u6i+delta*u7i+del2*u8i+del3*u9i+del4*u10i+del5*u11i un7i = u7i+delta*u8i+del2*u9i+del3*u10i+del4*u11i un8i = u8i+delta*u9i+del2*u10i+del3*u11i un9i = u9i+delta*u10i+del2*u11i un10i = u10i+delta*u11i un11i = u11i do m=1,mmax ipm = mod(m*ipp-1,mint)+1 ibm = mod(m*(ipp+mint-1)-1,mint)+1 fup(m) = fup(m)+(uni-un2i/w2(m)+un4i/w4(m)-un6i/w6(m)+un8i/w8(m) c -un10i/w10(m))/w1(m)*ddsin(ipm) c -(ui-u2i/w2(m)+u4i/w4(m)-u6i/w6(m)+u8i/w8(m) c -u10i/w10(m))/w1(m)*ddsin(ibm) c +(un1i-un3i/w2(m)+un5i/w4(m)-un7i/w6(m)+un9i/w8(m) c -un11i/w10(m))/w2(m)*ddcos(ipm) c -(u1i-u3i/w2(m)+u5i/w4(m)-u7i/w6(m)+u9i/w8(m) c -u11i/w10(m))/w2(m)*ddcos(ibm) enddo C -------------------------------------------------------------------- c total flops = (4+2) * (# of non-zero matrix elements) c * (# of time steps) * (_ORDER-2)/2 #define _A_CHUNK_OF_WORK 500000000 #define _STEP_TO_INFORM _A_CHUNK_OF_WORK/nonzero/(_ORDER-2) IF ((mod(ip,_STEP_TO_INFORM).eq.0).and.PRINT_FLOPRATE) a THEN call checkwatch(seconds_spent) totalflops = 3. * nonzero * ip * (_ORDER-2) print *,'Step = ',ip,100.*ip/mint,'% of a base cycle done:' floprate = (1.e-6)*totalflops/seconds_spent print *,' Average Mflops/s up to now = ',floprate print *,' Mflops/s per processor = ',floprate/num_processors ENDIF IF (IPP.EQ.MINT) THEN PRINT *,' ' DO 347 M=1,MMAX im = ibase(iw)+m-1 c for empty channels if (ar(m).eq.0.and.ai(m).eq.0) then converged(m) = .true. value(m) = 0. endif if (.not.converged(m)) then c decode the spin-wave fup(m) = fup(m)*DCMPLX(dcos(phi(m)),-dsin(phi(m))) uu = real(fup(m)) vv = dimag(fup(m)) c cancel out all odd-numbered noise functions fpm = (uu+dsign(ar(m)/ai(m),ai(m)*ar(m))*vv)/ar(m)/2.d0 c plug into the formulae VALUE(M) = -2.*M*ALPHA*ALPHA*MINT a /IP/PI/PI/UFREQ_IN_THZ*FPM c scale it because of the spectra shift transformation VALUE(M) = VALUE(M)*DOS(3,IM)/DOS(1,IM) if (PRINT_FREQ) print *,'w =',dos(3,im), value(m) else if (PRINT_FREQ) print *,'w =',dos(3,im), dos(2,im)/sum_weight, a ' * converged' endif 347 CONTINUE print *,' ' IF ((IP/MINT).GT.MAXITER) THEN C ------------------------------------------------------------ C stop if after MAXITER base cycles we still did't converge do 705 m=1,mmax IM = IBASE(IW)+M-1 if (.NOT.CONVERGED(M)) a DOS (2,IM)=DOS(2,IM)+ a ((VALUE(M)+VC(M)+VB(M)+VA(M)+VN(M)+VM(M))/6.D0)*WEIGHT 705 continue GOTO 462 ELSEIF (RUNMODE.EQ.0.AND.(IP/MINT).EQ.MAXITER) THEN C ------------------------------------------------------------ C stop after certain iterations of the base cycle do 703 m=1,mmax IM = IBASE(IW)+M-1 if (value(M).gt.tolm) then DOS (2,IM)=DOS(2,IM)+VALUE(M)*WEIGHT endif 703 continue GOTO 462 ELSEIF (RUNMODE.EQ.1) THEN C ------------------------------------------------------------ C converge when the base frequency component converge IF ((ABS(VALUE(1)/VA(1)-1).LT.TOLE.AND. a ABS(VALUE(1)/VB(1)-1).LT.TOLE.AND. a ABS(VALUE(1)/VC(1)-1).LT.TOLE).or. a (value(1).lt.tolm.and.(IP/MINT).gt.MINITER)) THEN do 738 m=1,mmax IM = IBASE(IW)+M-1 if (value(M).gt.tolm) a DOS (2,IM)=DOS(2,IM)+ a (VALUE(M)*0.5+(VC(M)+VB(M))*0.25)*WEIGHT 738 continue GOTO 462 ENDIF ELSEIF (RUNMODE.EQ.2) THEN C ------------------------------------------------------------ C converge when all frequency components converge ALL_CONVERGED = .TRUE. DO 932 M=1,MMAX IF (.NOT.CONVERGED(M)) THEN CONVERGED(M) = (ABS(VALUE(M)/VA(M)-1).LT.TOLE.AND. a ABS(VALUE(M)/VB(M)-1).LT.TOLE.and. a ABS(VALUE(M)/VC(M)-1).LT.TOLE).or. a (value(M).lt.tolm.and.(IP/MINT).gt.MINITER) IF (.NOT.CONVERGED(M)) THEN ALL_CONVERGED = .FALSE. ELSEIF (value(m).gt.tolm) THEN IM = IBASE(IW)+M-1 DOS (2,IM)=DOS(2,IM)+ a (VALUE(M)*0.5+(VC(M)+VB(M))*0.25)*WEIGHT ENDIF ENDIF 932 CONTINUE IF (ALL_CONVERGED) GOTO 462 C ------------------------------------------------------------ ENDIF DO 679 M=1,MMAX VL(M) = VM(M) VM(M) = VN(M) VN(M) = VA(M) VA(M) = VB(M) VB(M) = VC(M) VC(M) = VALUE(M) 679 CONTINUE ENDIF c for a base frequency cycle uo10i = u10i uo8i = u8i uo6i = u6i uo4i = u4i uo2i = u2i uoi = ui u10i = u10(ibb) u8i = u8(ibb) u6i = u6(ibb) u4i = u4(ibb) u2i = u2(ibb) ui = u(ibb) do i=1,n3 cc = uold(i) uold(i) = u(i) u(i) = 2.d0*(u(i)+del2*u2(i)+del4*u4(i)+del6*u6(i) c +del8*u8(i)+del10*u10(i))-cc enddo IP = IP+1 GOTO 576 462 IF (PRINT_FLOPRATE) THEN call checkwatch(seconds_spent) totalflops = 3. * nonzero * ip * (_ORDER-2) print *,'Step = ',ip,100.*ip/mint,'% of a base cycle done:' floprate = (1.e-6)*totalflops/seconds_spent print *,' Average Mflops/s up to now = ',floprate print *,' Mflops/s per processor = ',floprate/num_processors ENDIF 461 CONTINUE c the loop through all base frequency sets ends here OPEN (UNIT=LP_DOS, STATUS='UNKNOWN', FORM='FORMATTED', a FILE=NAME_WRITE) DO I = 1,IBASE(NPA+1)-1 IF (DOS(3,I).GT.0.E0) a WRITE (LP_DOS,501) DOS(3,I), DOS(2,I)/SUM_WEIGHT 501 FORMAT (2(1X,E13.7)) ENDDO CLOSE (LP_DOS) C ------------------------------------------------------- C Terminate the slave processes here because A(i,j) C is going to be changed: IF (num_processors.gt.1) CALL kill_all_slaves() C ------------------------------------------------------- c sample more supercell k's 209 CONTINUE C Free IPC resources IF (num_processors.gt.1) THEN CALL free_shm() CALL free_semaphore() ENDIF STOP END SUBROUTINE f77_sparse_multiply (IN,OUT,START,FINISH) # include "mulcp.fh" DOUBLE COMPLEX IN(1),OUT(1) INTEGER START,FINISH,I,K DO I=START,FINISH DO K=1,IDX(I) OUT(ID(K,I)) = OUT(ID(K,I))-A(K,I)*IN(I) ENDDO ENDDO RETURN END