c
c     Fx histogram program,  nxn image, 512 grayscale values
c
c     Compares the performance of a FORALL
c     style histogram with a PDO style histogram
c
#define N 1024
#define COLORS 512
#define ITERS 10

      program hist
      real image(N,N)
      integer i
      integer result(COLORS)
      external timer_stop
      integer timer_stop
      integer forall_clocks, pdo_clocks

      template t(N)
      align image(i,j) with t(i)
      distribute t(block(64))

      call make_image(image)
      call timer_start()
      do i = 1,ITERS
         call hist_forall(image, result)
      enddo
      forall_clocks = timer_stop()
      call verify(result)

      call make_image(image)
      call timer_start()
      do i=1,ITERS
         call hist_pdo(image, result)
      enddo
      pdo_clocks = timer_stop()
      call verify(result)

      call prperf(N, COLORS, 
     $     forall_clocks/ITERS,pdo_clocks/ITERS)
      end

      subroutine make_image(image)
      real image(N,N)

      template t(N)
      align image(i,j) with t(i)
      distribute t(block(64))

      pdo i=1,N
      pout image(i,:)
         do j=1,N
c            image(i,j) = 1. + mod(j-1,COLORS)
            image(i,j) = 32.0
         end do
      end pdo

      end

      subroutine hist_forall(image,result)
      real image(N,N)
      integer result(COLORS)
      integer local_result(N,COLORS)
      integer pixel
      
      template t(N)
      align image(i,j) with t(i)
      align local_result(i,j) with t(i)
      distribute t(block(64))
c
c     simulate the forall
c
      local_result = 0
      pdo i=1,N
      pin image(i,:)
      pout local_result(i,:)
         do j = 1,N
            pixel = image(i,j)
            local_result(i,pixel) = local_result(i,pixel) + 1
         enddo
      endpdo
c
c     simulate the call to the user defined reduce intrinsic
c
      pdo i=1,N
      pin local_result(i,:)
      pmvars result
      pinit 
         result = 0
      pbody
         result = result + local_result(i,:)
      pmerge
         result = left(result) + right(result)
      endpdo

      end

      subroutine hist_pdo(image,result)
      real image(N,N)
      integer result(COLORS)
      integer pixel
      external timer_stop
      integer timer_stop
      
      template t(N)
      align image(i,j) with t(i)
      distribute t(block(64))

      pdo i=1,N
      pin image(i,:)
      pmvars result
      pinit
         result = 0
      pbody
         do j = 1,N
            pixel = image(i,j)
            result(pixel) = result(pixel) + 1
         enddo
      pmerge
         result = left(result) + right(result)
      endpdo
      end

      subroutine verify(result)
      integer result(COLORS)
      integer errors

      errors = 0
      do i=1,COLORS
c         if (result(i) .ne. N*N/COLORS) then
         if (i .ne. 32) then
            if (result(i) .ne. 0) then
               errors=errors+1
            endif
         else
            if (result(i) .ne. N*N) then
               errors=errors+1
            endif
         endif
      enddo

      if (errors .gt. 0) print *, 'Errors =', errors

      end

      subroutine prperf(n, colors, forall_clocks, pdo_clocks)
      integer n, colors, forall_clocks, pdo_clocks
      real pdo_msecs, forall_msecs

      pdo_msecs = real(pdo_clocks)/20000.0
      forall_msecs = real(forall_clocks)/20000.0         
      print *, n, colors, 
     $     pdo_clocks, pdo_msecs,
     $     forall_clocks, forall_msecs

      end
      
      

