program filtered_bak C**************************************************************** C PROGRAM FOR CARRYING OUT TOMOGRAPHIC RECONSTRUCTION C FOR 2d PROJECTION DATA FOR PARALLEL BEAM PROJECTIONS C ON THE INTEL PARAGON. C C PROGRAMMER: RAMAN RAO C DATE OF LAST REVISION: 1/2/95 C C VIRGINIA POLYTECHNIC INSTITUTE AND STATE UNIVERSITY C BLACKSBURG, VA 24061 C*************************************************************** C C u = # of rows per processor(=# of angles/nproc ) C nproc = np = number of processors used C np1 = np-1 C nnodes=number ofnodes (generated by the system) C iam = my processor number C ptype = process type C szreal = number of bytes in type real in fortran C power2 = length of the final image in the power of 2 C MAX = actual length of the final image C rays = # of rays used in the forward projection C rows=number of anglesa C cols = number of detectors(rays) C odim = object dimension C rdim = reconstruction dimension (=xdim=ydim in this object only) C xdim, ydim = size of the final image C etime = elapsed time C etime1-6 = elapsed times for intermediate stages of the program C step = step size for generating the checker board C blank = density allocated inside the object C den = same as the above C object = object to be projected and then backprojected C r=xcos(theta)+ysin(theta) normalized to lie in the range (-1,1) C pwr2=power of 2 (real), ipower2=int(pwr2) C mat= initial projection matrix C pmat = projection matrix on each processor. This is the same C as the matrix 'mat' except that the data contained in C 'pmat' is different on each processor (contains data C pertinant to that processor) C nbytes_pmat=number of bytes in the 'pmat' matrix C H = row matrix of the filter/response function C nbytes_h = number of bytes in the H matrix C P = temporary row matrix for storing the rows of 'pmat' C lb = low bin; hb = high bin; mb = mid bin C i,j,k,iall,jall,ii1,l1,ii,ii2,ik: all are temporary variables C for running the 'do loops' C tmp_xy= 2D array for storing the partial summations in the C back-projection stage C sum_xy = 2D array for storing the summation for all (x,y) C msg_xy = 2D array which is the same as tmp_xy but is a C message sent from some processor to the root C or zeroth processor C f = the final matrix containing the reconstructed densities implicit none include 'fnx.h' integer max,angles,rays,xdim,ydim,u,nproc,power2,iix,ij integer odim,step,iproc parameter(MAX=64,angles=128,rays=16,xdim=64,ydim=64) parameter(odim=16,step=4) parameter(u=128,nproc=1) parameter(power2=6) complex xx,H,pmat,mat,P,tmp2 real tau,theta,r,beta,frac,x_max,y_max,mb,mp,tw real delq,tmp_xy,T,sum,f,msg,pwr2,pi,x_cen,y_cen real tot_angle_sum,proc_angle_sum,sum_s,msg_s,const real proc_zero_ang_sum,sum_xy,msg_xy double precision stime,etime,etime1,etime2,etime3,etime4 double precision etime5,etime6 integer rows,cols,n1,k1,ptype,nnodes,iam,x,y,int_val integer iall,jall,i,j,k,nbytes_pmat,ii1,l1,ii,ii2,ik integer ipwr2,szreal,npp,np,nbytes_p,nbytes_h,lb,hb integer blank,den,i1,j1,np1 real x_cen_o,y_cen_o,object dimension xx(16),H(MAX),mat(angles,rays),pmat(u,MAX) dimension P(MAX),T(30),f(xdim,ydim),sum(ydim),msg(ydim) dimension object(odim,odim) dimension proc_zero_ang_sum(MAX),proc_angle_sum(MAX) dimension tot_angle_sum(MAX),tmp_xy(xdim,ydim) dimension sum_xy(xdim,ydim),msg_xy(xdim,ydim) C begin initializations etime=0.0 etime1=0.0 etime2=0.0 etime3=0.0 etime4=0.0 etime5=0.0 etime6=0.0 stime=0.0 PI = acos(-1.0) c stime = dclock() c write(*,*) 'PI=',PI np=nproc np1 = nproc-1 write(*,*) 'np1=',np1 szreal = 4 rows = angles cols = rays tau = 1.0 npp = rows/np nbytes_pmat = MAX*u*szreal*2 nbytes_h = MAX*szreal*2 x_max = real(xdim) y_max = real(ydim) x_cen = x_max/2. y_cen = y_max/2. x_cen_o=real(odim)/2.0 y_cen_o=real(odim)/2.0 mp = real(rays)/2. tw = 1./(1.15*sqrt(2.)) const = PI/real(rows) write(*,*) 'const=',const ipwr2 = power2 C initialize system variables on each node iam = mynode() nnodes = numnodes() ptype = myptype() C find the start time stime = dclock() C init pmat in all processors to zero: this step takes care C of the additional padding with zero's which is required later do iall=1,u do jall = 1,MAX pmat(iall,jall)=(0.,0.) enddo enddo etime1 = dclock()-stime write(*,*) 'etime1=',etime1 do iall=1,odim do jall=1,odim object(iall,jall)=0. enddo enddo if (iam. eq. 0) then write(*,*) 'Number of processors is',np write(*,*) 'angles=',rows,'rays=',cols C generating the checker board (object to be reconstructed) blank =0 do i=1,odim,step if (blank .eq. 0) then blank = 255 else blank = 0 endif do j=1,odim,step if (blank .eq. 0) then blank = 255 else blank = 0 endif do i1=i,i+step do j1=j,j+step object(i1,j1)=real(blank) c write(*,*) object(i1,j1),'i1=',i1,'j1=',j1 enddo enddo enddo enddo etime2 = dclock()-stime write(*,*) 'etime2=',etime2 write(*,*) 'finished generating the object' C uncomment the following lines if you need to write the object to a file c open (unit = 19,file ='object.fil',status='old',form='formatted') c do iix=1,odim c do ii=1,odim c write(19,*) object(iix,ii) c enddo c enddo c close(19) c write(*,*) 'finished writing the object.fil' C Evaluate the response function, find its fft C and send this data to the other nodes (p. 72 of kak) call evalh2(H,1.0,cols) write(*,*) 'H evaluated' C uncomment the following line if you need to write H to a file c open (unit = 15,file ='h.fil',status='old',form='formatted') c do i=1,MAX c write(15,*) 'i=',i,'H=',H(i) c enddo c close(15) C broadcast the object and H to all the processors call csend(20,object,odim*odim*szreal,-1,ptype) call csend(20,H,nbytes_h,-1,ptype) else C Give the corresponding receives to synchronize send's and receive's call crecv(20,object,odim*odim*szreal,0,ptype) call crecv(20,H,nbytes_h,0,ptype) end if etime3 = dclock()-stime write(*,*) 'etime3=',etime3 C forward project the object. C This step directly generates the 'pmat' matrices C for all the processors C finding the forward projections in parallel on all the nodes do i=1,odim do j=1,odim do k=1,u theta = 2.0*real((u*iam)+k)*pi/real(angles) r = cos(theta)*((real(i)-x_cen_o)/x_cen_o) $ +sin(theta)*((real(j)-y_cen_o)/y_cen_o) mb=(r*mp*tw)+mp lb=int(mb) c write(*,*) 'lb=',lb hb=lb+1 frac=mb-lb pmat(k,lb)=pmat(k,lb)+cmplx((1.0-frac)*object(i,j)) pmat(k,hb)=pmat(k,hb)+cmplx(frac*object(i,j)) enddo enddo enddo write(*,*) 'finished the forward projections' etime4 = dclock()-stime write(*,*) 'etime4=',etime4 C Now find the FFT of the input matrix row-wise on all the nodes C this is done in parallel but row-wise serial on all the nodes C The results of the FFT are again stored in the pmat matrix do i=1,u C write a row of PMAT to P do j=1,MAX P(j) = PMAT(i,j) enddo C find the FFT call diffft(P,ipwr2) C write back P into PMAT do l1=1,MAX PMAT(i,l1) = P(l1) enddo enddo C uncomment if you need to verify this step c open (unit = 18,file ='pmat1.fil',status='old' c $,form='formatted') c do i =1,np c if (iam .eq. i-1) then c do ii=1,u c write(18,*) 'iam=',iam,'row=',ii c write(18,*) (pmat(ii,ij), ij=1,MAX) c enddo c endif c enddo c close(18) C multiply the two funs H and and pmat. Multiply here C means scalar multiplication of equivalent elements in the C same position (becomes convolution in the time domain). C This part is executed in parallel on all the nodes C ref( p. 75 kak). Multiplication with a Hamming function C is also done right here do i=1,u do j=1,MAX PMAT(i,j) = PMAT(i,j) * H(j) * $ (0.54-0.46*cos((2.0*(real(j-1))*pi)/real(MAX))) c write(*,*) 'i=',i,'j=',j,'pmat=',PMAT(i,j) enddo enddo C uncomment if you need to verify this step c open (unit = 17,file ='pmat2.fil',status='old',form='formatted') c do iix=1,nproc c call gsync() c if (iam .eq. iix-1) then c do ii=1,u c write(17,*) 'iam=',iam,'row=',ii c write(17,*) (pmat(ii,ij), ij=1,MAX) c enddo c endif c enddo c write(*,*) 'I finished the point to point multiply','iam=',iam c write(*,*) 'iam=',iam c close(17) C find the row-wise IFFT of the new matrix C IFFT is found by: cmplx conj. of the data row by row, finding the C forward transform, again cmplx conj. and then multiply by the C scale factor. This step proceeds in parallel on all the C nodes(ref. any d.s.p. book or d.s.p. by Oppenheim and Schafer. C for all rows on the processor do: do i=1,u C move a row from PMAT to P do j=1,MAX P(j) = PMAT(i,j) enddo C find the complex conj. of each element do ij=1,MAX tmp2 = P(ij) P(ij) = conjg(tmp2) enddo C find the FFT call diffft(P,ipwr2) C find the complex conj. of each element do ij=1,MAX tmp2 = P(ij) P(ij) = conjg(tmp2) enddo C normalize by the scale factor do ik =1,MAX tmp2= P(ik) P(ik) = (tmp2)/real(MAX) enddo C put it bak in the original matrix do j=1,MAX PMAT(i,j) = P(j) enddo enddo write(*,*) 'Finished calculating the filtered vals' C synchronize all processors: This is done to time the filtered C projections call gsync() etime5 = dclock() - stime write(*,*) 'etime5=',etime5 etime = etime1 + (etime5-etime4) if (iam .eq. 0) then write(*,*) 'time for calculating filtered projections is',etime write(*,*) 'iam=',iam endif C uncomment if you need to verify results at this stage c open (unit = 13,file ='pmat3.fil',status='old',form='formatted') c do iix=1,nproc c call gsync() c if (iam .eq. iix-1) then c do ii=1,u c write(13,*) 'iam=',iam,'row=',ii c write(13,*) (PMAT(ii,ij), ij=1,MAX) c enddo c endif c enddo c close(13) c do i=1,u c theta = real((u*iam)+i)*PI/real(angles) c write(*,*) 'theta=',theta,'iam=',iam c enddo C Backprojection begins here C C initialize do x=1,xdim do y=1,ydim tmp_xy(x,y)=0.0 msg_xy(x,y)=0.0 sum_xy(x,y)=0.0 enddo enddo C now carrying out the summation in the backprojection stage C (ref. p.67 kak. The summation on p.67 is split into smaller C summations on each processor. Then these smaller summations C are transferred to the zeroth processor for finding the C total sum. C C finds the tmp_xy's for each x and y C for all x and y do: do x = 1,xdim do y= 1,ydim C fo all rows on each processor do: do i= 1,u C theta evaluated: Note 2.0*PI may not be required C C Just PI will suffice as the multiplier theta = 2.0*PI*real((u*iam)+i)/real(angles) C find the value of 't'(ref. p49 kak) r = cos(theta)*((x-x_cen)/x_cen) $ + sin(theta)*((y-y_cen)/y_cen) C C find the mid bin, low bin and high bin mb= (r*mp*tw) + mp lb = int(mb) hb = int(mb)+1 C C find the fractional value of the mid bin frac = mb-lb tmp_xy(x,y)=tmp_xy(x,y)+ $ real(PMAT(i,lb))*(1.0-frac) + $ frac*real(pmat(i,hb)) c write(*,*) 'mb=',mb,'lb=',lb,'hb=',hb,'frac=',frac enddo enddo enddo C transfer all the tmp_xy's to the zeroth processor C and add them all up. if (iam .ne. 0) then call csend(20,tmp_xy,xdim*ydim*szreal,0,ptype) else C if iam the zeoro'th processor: C initialize the sum to zero do x=1,xdim do y=1,ydim sum_xy(x,y) = 0.0 enddo enddo C for all the processors on the system other than zero do: do ii1 = 1,np1 C C receive the tmp_xy sent earlier call crecv(20,msg_xy,xdim*ydim*szreal,ii1,ptype) C C add them all up as they are received do x=1,xdim do y=1,ydim sum_xy(x,y)=sum_xy(x,y)+msg_xy(x,y) enddo enddo enddo C find the density matrix do x=1,xdim do y=1,ydim f(x,y)=const*(sum_xy(x,y)+tmp_xy(x,y)) enddo enddo C endif for the zero'th processor endif C synchronize all the processors for getting the timing information call gsync() if (iam .eq. 0) then etime6 = dclock() - stime write(*,*) 'etime6=',etime6 etime = etime1+(etime5-etime4)+(etime6-etime5) write(*,*) 'total elapsed time=',etime,'iam=',iam C C write the densities to a file open (unit = 11,file ='f.fil',status='old', $ form='formatted') write(11,*), ((f(i,j),j=1,ydim),i=1,xdim) close(11) endif C end of main program stop end C finds the FFT. The method used is 'decimation in time' SUBROUTINE DIFFFT(X,NU) COMPLEX X(1024),U,W,T N=2**NU PI=3.14159265358979 DO 20 L=1,NU LE=2**(NU+1-L) LE1=LE/2 U=(1.,0.) W=CMPLX(COS(PI/FLOAT(LE1)),-SIN(PI/FLOAT(LE1))) DO 20 J=1,LE1 DO 10 I=J,N,LE IP=I+LE1 T=X(I)+X(IP) X(IP)=(X(I)-X(IP))*U 10 X(I)=T 20 U=U*W NV2=N/2 NM1=N-1 J=1 DO 30 I=1,NM1 IF (I .GE. J) GO TO 25 T=X(J) X(J)=X(I) X(I)=T 25 K=NV2 26 IF (K .GE. J) GO TO 30 J=J-K K=K/2 GO TO 26 30 J=J+K RETURN END C evaluates the filter (or response function) in the C frequency domain. subroutine evalh2(H,tau,N) real tau, PI integer N,j,i,k,ii,MAX,HMAX parameter(MAX=64,HMAX=32) complex H(MAX),a1(HMAX) PI = acos(-1.0) do j=1,MAX H(j) = (0.,0.) enddo do i=1,HMAX a1(i) = real(i) enddo do i =1,MAX if (i .le. HMAX) then H(i) = real(i) else H(i) = real(MAX)+1.0 -real(i) endif enddo C Optional: can multiply with a Hamming window function here. C Not required really, but can be used to get better(?) images. C Note that the filter function C changes from a ramp on using C this Hamming multiply. c do i = 1,MAX c H(i) = H(i)*(.54 + .46*cos(2.0*pi*real(i)/(real(MAX)))) c enddo return end