program matmultest real(kind(1d0)), allocatable :: a(:,:),b(:,:),c(:,:) real(kind(1d0)) :: t integer :: i,size,reps,t1,t2,rate integer, parameter :: minsize=100,maxsize=10000 size=minsize call system_clock(t1,rate) write(*,*)' Size Repeats Seconds MFLOPS' do while (size.le.maxsize) allocate(a(size,size),b(size,size),c(size,size)) a=0 b=0 c=0 do i=1,size a(i,i)=0.5 b(i,i)=2 enddo reps=1e7/(size*size) if (reps.lt.1) reps=1 call system_clock(t1) ! in theory a compile might spot matmul(a,b) as a loop invariant ! in practice, none do, and anyway the optimisation would not ! help when reps=1, as it is on the last iteration with size=6400 do i=1,reps c=c+matmul(a,b) enddo call system_clock(t2) t=(t2-t1)/real(rate) write(*,100)size,int(c(1,1)),t/reps,2*reps*1e-6*size*size*size/t 100 format(I8,I8,F12.6,F12.1) deallocate(a,b,c) size=size*4 enddo end program matmultest