************************************************************************
*  3-D Traveltimes and Amplitude via Eikonal and Transport solvers     *
*  Maissa A. Abd El-Mageed: 1996                                       *
*  Seongjai KIM: 6.97 (superconvergence)                               *
************************************************************************

      subroutine ttsolve3d(nx,ny,nz,dx,dy,dz,xmin,ymin,
     &     zmin,xs,ys,zs,rho,ap,zd,v,tt,amps,
     &     work,len_work,iverb,ipout,idbg,ipdmp,ier)

c Arguments:

      integer nx,ny,nz,         ! number of samples in x,y and z
     &        len_work          ! length of available workspace

      real tarray(2),time1,time2,etime,       
     &     dx,dy,dz,xmin,ymin,zmin,    ! steps and the upper left corner
     &     xs,ys,zs,                   ! source coordinates
     &     zd,ap,                      ! datum depth and faithful aperture
     &     v(nz,nx,ny),tt(*),amps(*),  ! velocity array and TT and AMP
     &     work(*)                     ! workspace

      real eik_eps    ! eikonal threshhold
      integer id_eno  ! token for 2nd-order ENO ux & uy (=1:ENO, =0:one-sided)

c----------------------------------------------------------------------

      if (ier.ne.0) return

      id_eno=1

      if (iverb.ge.1) then
         write(ipout,'("  ## id_eno =",i2)') id_eno
         write(ipout,'("  TTAMP3D...")')
         time1=etime(tarray)
      end if

c BIG DEBUG DUMP

      if (idbg.ge.1) then
         write(ipdmp,'(" TTAMP3D: debug dump"   /
     &      "  scalar arguments:"               /
     &      "  nx   = ",i12  / "  ny   = ",i12  / "  nz   = ",i12  /
     &      "  dx   = ",f12.4/ "  dy   = ",f12.4/ "  dz   = ",f12.4/
     &      "  xmin = ",f12.4/ "  ymin = ",f12.4/ "  zmin = ",f12.4/
     &      "  xs   = ",f12.4/ "  ys   = ",f12.4/ "  zs   = ",f12.4/
     &      "  zd   = ",f12.4/ "  ap   = ",f12.4)')
     &      nx,ny,nz,dx,dy,dz,xmin,ymin,zmin,xs,ys,zs,zd,ap
         write(ipdmp,'("  internal scalars: len_work =",i12)') len_work
      end if

c COMPUTE...

c initialize arrays (TTINIT) and downsweep (TTAMP)

      call ttinit(nx,ny,nz,dx,dy,dz,xmin,ymin,zmin,xs,ys,zs,
     &     rho,zd,ap,izd,eik_eps,v,tt,amps,iverb,ipout,idbg,ipdmp,ier)

      call ttamp(nz,nx,ny,dz,dx,dy,izd,eik_eps,v,tt,amps,
     &     work,len_work,iverb,ipout,idbg,ipdmp,ier,id_eno)

      if (ier.ne.0) then
         write(ipdmp,*)' Error: TTAMP3D from TTINIT/TTAMP'
         return
      end if

      if (iverb.ge.1) then
         time2=etime(tarray)
         write(ipout,'("  elapsed time =",f8.2)') time2-time1
      end if

      return
      end

c======================== TTINIT ================================

      subroutine ttinit(nx,ny,nz,dx,dy,dz,xmin,ymin,zmin,xs,ys,zs,
     &     rho,zd,ap,izd,eik_eps,v,tt,a,iverb,ipout,idbg,ipdmp,ier)

c initialized traveltime, traveltime gradient, and amplitude
c grids for 3D geometric acoustics,    WWS 5.93

c NOTE: it is ASSUMED that the velocity is constant over the intial grid
c WWS 7.94 revision of RV revision 2.94

c SKIM 6.97

c arguments

      integer
     &     nx,ny,nz,          ! global grid extents
     &     ipdmp,ier          ! dump unit, error flag

      real
     &     dx,dy,dz,          ! x,y and z steps
     &     xmin,ymin,zmin,    ! corner coordinates of global grid 
     &     xs,ys,zs,          ! source coordinates
     &     zd,                ! datum depth
     &     ap,                ! faithful aperture (degree)
     &     eik_eps,           ! eikonal cutoff
     &     v(nz,nx,ny),       ! input velocity array
     &     tt(nz,nx,ny),      ! traveltime array
     &     a(nz,nx,ny)        ! amplitude array
c internal variables

      integer isx,isy,isz,ix,iy,iz,izd
      real time_max,distance,distance1,tol,pi,velsrc,slwsrc
      real height,small,tancut,sincut
      real ttexact,gradient,rrbig,radius,rplain,rx2
c--------------------------------------------------------------------

      data tol /1.0e-2/, pi /3.1415927/, small /1.0e-4/

      if (ier.ne.0) return

c compute some useful scalars

      isx=nint((xs-xmin)/dx)+1
      isy=nint((ys-ymin)/dy)+1
      isz=nint((zs-zmin)/dz)+1
      izd=nint((zd-zmin)/dz)+1
      tancut=tan(pi*abs(ap)/180.0e+00)
      sincut=sin(pi*abs(ap)/180.0e+00)
      eik_eps=cos(pi*abs(ap)/180.0e+00)

      velsrc=v(isz,isx,isy)
      slwsrc=1./velsrc
      r4pi=rho/(4.*3.1415927)

      if ((velsrc.lt.tol).or.(dz.lt.small)) then
         write(ipdmp,*)' Error: TTINIT: velsrc or dz is too small'
         ier=ier+1
         return
      end if

      if (izd.le.isz) then
         izd=int((zs-zmin)/dz+1.0+tol)+1
         tmp=zmin+float(izd-1)*dz
         if (tmp.gt.zd) then
            zd=tmp
            if(iverb.ge.1) write(ipout,'("  THE NEW zd =",f10.4)') zd
            if(idbg.ge.1)  write(ipdmp,'("  THE NEW zd =",f10.4)') zd
         end if
      end if
      
      if (idbg.ge.1) then
         write(ipdmp,'("  TTINIT:"         /
     &      "  nx,ny,nz        = ",3(i12)  /
     &      "  dx,dy,dz        = ",3(f12.4)/
     &      "  xmin,ymin,zmin  = ",3(f12.4)/
     &      "  xs,ys,zs        = ",3(f12.4)/
     &      "  isx,isy,isz,izd = ",4(i12))')
     &   nx,ny,nz,dx,dy,dz,xmin,ymin,zmin,xs,ys,zs, isx,isy,isz,izd
      end if

c--Loop over grid points

c compute point source response
c
c Compute the radius of the circle and then compute the gradient at
c this radius
c 
c Calculate the travel time and the amplitudes.  The travel time is
c exact if it's inside the circle.  Otherwise it will be linearly
c approximated if it doesn't satisfy the angle condition.  ttexact is
c the exact travel time at the edge of the circle.  it is calculated at
c rrbig.

      height  = zmin+float(izd-1)*dz-zs
      rz2     = height**2
      rrbig   = height*tancut
      gradient= sincut*slwsrc
      ttexact = sqrt(rrbig**2+rz2)*slwsrc

CDIR$ NORECURRENCE
      do iy=1,ny
                  ry2=(ymin+float(iy-1)*dy-ys)**2
      do ix=1,nx
                  rx2=(xmin+float(ix-1)*dx-xs)**2
         radius=sqrt(rx2+ry2+rz2)
         a(izd,ix,iy)=r4pi/radius
         rplain=sqrt(rx2+ry2)
         if (rplain.le.rrbig) then
            tt(izd,ix,iy)=radius*slwsrc
         else
            tt(izd,ix,iy)=gradient*(rplain-rrbig)+ttexact
         end if
      end do
      end do

c      time_max=10000.
      time_max=0.0
      izdm1 = izd-1

CDIR$ NORECURRENCE
      do k=1,ny
      do j=1,nx
         temp_amp=a(izd,j,k)
      do i=1,izdm1
         tt(i,j,k)=time_max
         a(i,j,k) =temp_amp
      end do
      end do
      end do

      return
      end

c============================ TTAMP ==============================

      subroutine ttamp(nz,nx,ny,dz,dx,dy,ntop,eik_eps,v,tt,amp, 
     &     work,lenwork,iverb,ipout,idbg,ipdmp,ier,id_eno)
c-------------------------------------------------------------
c    3D traveltime and geometric optics amplitude solver
c
c    Driver to extend traveltime and amplitudes throughout
c    a rectangular array, given values on the ntop row (i.e.
c    first index = ntop) and above.
c
c    Uses 2nd order Runge-Kutta time step for numerical 
c    Hamiltonian operator, which returns both the nu-
c    merical Hamiltonian and a suitable right-hand side for
c    the transport equation. The local step is also returned
c    by this operator - it is chosen to be the minimum of a
c    safe CFL step and the remainder of the current step.
c
c    Current version (07.94): right hand sides of both eikonal
c    and transport equations are modified to limit ray incidence
c    angles to an aperture and to damp amplitudes along rays
c    at the aperture boundary.
c
c    WWW         05/93
c    HT          08/93
c    WWS         12/93
c    RV          02/94
c    WWS         07/94 - NOTE THAT EVOLUTION COORDINATE IS NOW 1ST!!!!
c    SKIM        06/97
c--------------------------------------------------------------
c--Arguments

      real dz,dx,dy,eik_eps,    ! steps and faithful threshhold
     &     v(nz,nx*ny), tt(nz,nx*ny), amp(nz,nx*ny), work(*)

      integer nz,nx,ny,         ! array dimensions
     &        ntop,             ! row index for initial data
     &        lenwork           ! size of available workspace

c--Internal Variables

      real pi, tol,
     &     dz_loc,step_total,   ! substep and partial step taken so far
     &     amp_peak_init,       ! amp peak
     &     amp_max,amp_min      ! max/min relative amplitude

      integer i,i1,k,j,jx,jy,jx1,jy1,
     &        newwork,             ! workspace available for subs
     &        max_partial_steps,   ! guess
     &        n2ny,nxn2            ! (nx+2)*ny and nx*(ny+2)

c--pointer to buffer: start with "id_"

      integer id_css,   id_nss,    ! current/next time level in ss
     &        id_iss,   id_ssz,    ! interpolation/z-derivative of ss
     &        id_eikrhs,id_trarhs, ! rhs of eik eqn / transport eqn
     &        id_tt1,   id_tt2,    ! travel time
     &        id_dxt,   id_dyt,    ! travel time x/y diff
     &        id_dxxt,  id_dyyt,   ! travel time 2nd x/y diff
     &        id_amp1,  id_amp2,   ! amplitude
     &        id_dxyt,  id_cutoff, ! tx 1st-diff in y  and cutoff
     &        id_work              ! workspace

      real dtaper
      integer ntaper

      logical done,adjust,amplitudes

      data tol /1.e-4/, max_partial_steps /50/, amplitudes /.true./,
     &     pi /3.1415927/, ntaper /4/    

c======================================================================

      if (ier.ne.0) return

c--useful numbers

      zero=0.0e+00
      nxny=nx*ny
      n2ny=(nx+2)*ny
      nxn2=nx*(ny+2)
      divdz=1.0/dz

c--set minimum, maximum relative amplitudes

      dtaper=1.0e+00/float(ntaper)
      amp_min=1.e-3
      amp_max=1.e+1

c--Grab memory

      id_work=1

      call getbuf('work', id_css,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_nss,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_iss,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_ssz,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_tt1,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_tt2,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_amp1,   nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_amp2,   nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_dxt,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_dyt,    nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_dxxt,   n2ny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_dyyt,   nxn2,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_dxyt,   nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_eikrhs, nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_trarhs, nxny,id_work,lenwork,ipdmp,ier)
      call getbuf('work', id_cutoff, nxny,id_work,lenwork,ipdmp,ier)

      if (ier.ne.0) then
         write(ipdmp,'("  Error: TTAMP: GETBUF for pointers: ",
     &         "ier =",i3)') ier
         return
      end if

      iss1=id_css        ! save the pointers
      iss2=id_nss        ! do not erase them

      newwork=lenwork-id_work
      amp_peak_init=0.0e+00

      do j=1,nxny
         amp_peak_init=max(amp_peak_init,amp(ntop,j))
      end do

      amp_max=max(1.0e-7,amp_peak_init*amp_max)
      amp_min=max(1.0e-7,amp_peak_init*amp_min)

      id_sszm1=   id_ssz-1
      id_tt1m1=   id_tt1-1
      id_tt2m1=   id_tt2-1
      id_amp1m1=  id_amp1-1
      id_amp2m1=  id_amp2-1
      id_eikrhsm1=id_eikrhs-1
      id_trarhsm1=id_trarhs-1

CDIR$ NORECURRENCE
      do j=1,nxny
         work(id_amp1m1+j)=log(max(amp(ntop,j),amp_min))
         work(id_tt1m1+j) =tt(ntop,j)
      end do

c--Debug instructions

      if (idbg.gt.0) then
         write(ipdmp,'("  TTAMP: inputs"/ "  nx,ny,nz,dx,dy,dz =",
     &      3(i4),3(f10.4))') nx,ny,nz,dx,dy,dz
      end if

      amp_min=log(amp_min)
      amp_max=log(amp_max)

      itotal=0
c==================================================================
      do 1000 i= ntop, nz-1
c==================================================================
c NOTE that the amplitude work vector contains the natural logarithm
c of the amplitude

c--Set step total to zero. Object is to reach dz

      step_total=zero
      done=.false.
      ip1=i+1

c--Set up square slowness arrays, this and next depth levels

      if (mod(i,2).eq.0) then
         id_css=iss1
         id_nss=iss2
      else
         id_css=iss2
         id_nss=iss1
      end if

      id_cssm1=id_css-1
      id_nssm1=id_nss-1

      if (i.eq.ntop) then
CDIR$ NORECURRENCE
         do j=1,nxny
            work(id_cssm1+j)=1.0/v(i,j)**2
         end do
      end if

CDIR$ NORECURRENCE
      do j=1,nxny
         work(id_nssm1+j)=1.0/v(ip1,j)**2
      end do

c--compute and save (s**2)_z

CDIR$ NORECURRENCE
      do j=1,nxny
         work(id_sszm1+j)=(work(id_nssm1+j)-work(id_cssm1+j))*divdz
      end do

c==PARTIAL STEP LOOP
c----------------------------------------------------------------
      do 2000 i1= 1,max_partial_steps
c----------------------------------------------------------------

c==interpolate square slowness field

      if (i1.eq.1) then
         id_iss=id_css
      else
         relstep=(dz-step_total)*divdz
         do j=1,nxny
            work(id_iss-1+j)=relstep*work(id_css-1+j)
     &                 +(1.-relstep)*work(id_nss-1+j)
         end do
      end if

c==First call to numerical hamiltonian.
c  Returns a suitable value of dz_loc each time. 

      adjust= .true.

      call eikonal_rhs3d(nx, ny, nz, i,
     &   work(id_iss),    work(id_tt1),  work(id_eikrhs),
     &   work(id_cutoff), work(id_dxt),  work(id_dyt), 
     &   work(id_dxxt),   work(id_dyyt), work(id_dxyt),
     &   dx, dy, dz_loc, dz, step_total, adjust, newwork, 
     &   eik_eps, work(id_work), ipdmp, idbg,ier)
      
      if (ier.ne.0) then
         write(ipdmp,*) ' Error: TTAMP from  EIK_RHS'
         return
      end if

c--First Runge-Kutta half-step for tt

CDIR$ NORECURRENCE
      do j=1,nxny
         work(id_tt2m1+j)=work(id_tt1m1+j)+dz_loc*work(id_eikrhsm1+j)
      end do

c==If amplitudes are to be computed evaluate RHS in transport equation
c  and take first RK half step for amps

      if (amplitudes) then
       
         call transport_rhs3d(nx, ny, nz, i, 
     &      work(id_iss),  work(id_ssz),
     &      work(id_tt1),  work(id_amp1), work(id_trarhs),
     &      work(id_dxt),  work(id_dyt),  work(id_dxxt), 
     &      work(id_dyyt), work(id_dxyt), work(id_eikrhs),
     &      work(id_cutoff),dx, dy, dz, dz_loc, step_total,
     &      newwork, work(id_work),ipdmp, idbg,ier,id_eno)

         if (ier.ne.0) then
            write(ipdmp,*)' Error: TTAMP from TRANS_RHS'
            return
         end if

CDIR$ NORECURRENCE
         do j=1,nxny
         work(id_amp2m1+j)=work(id_amp1m1+j)+dz_loc*work(id_trarhsm1+j)
         end do

c--otherwise just fill the amplitude array with ones
      else

CDIR$ NORECURRENCE
         do j=1,nxny
            amp(ip1,j)=1.0e+00
         end do
      end if

c==Second numerical Hamiltonian evaluation
c-------------------------------------------

      step_total=step_total+dz_loc
      adjust=.false.

      call eikonal_rhs3d(nx, ny, nz, i,
     &   work(id_iss),    work(id_tt2),  work(id_eikrhs),
     &   work(id_cutoff), work(id_dxt),  work(id_dyt), 
     &   work(id_dxxt),   work(id_dyyt), work(id_dxyt),
     &   dx, dy, dz_loc, dz, step_total, adjust, newwork, 
     &   eik_eps, work(id_work),ipdmp, idbg,ier)

c--Second Runge-Kutta half-step for tt

CDIR$ NORECURRENCE
      do j=1,nxny
         ntmp=id_tt1m1+j
         work(ntmp)=0.5*(work(ntmp)+work(id_tt2m1+j)
     &       +dz_loc*work(id_eikrhsm1+j))
      end do

c==Second evaluation of RHS of transport equation
       
      if (.not.amplitudes) goto 520

         call transport_rhs3d(nx, ny, nz, i,
     &      work(id_iss),  work(id_ssz),
     &      work(id_tt2),  work(id_amp2), work(id_trarhs),
     &      work(id_dxt),  work(id_dyt),  work(id_dxxt),
     &      work(id_dyyt), work(id_dxyt), work(id_eikrhs), 
     &      work(id_cutoff), dx, dy, dz, dz_loc, step_total,
     &      newwork, work(id_work),ipdmp, idbg,ier,id_eno)

c--Second RK half step for amps
                  
CDIR$ NORECURRENCE
         do j=1,nxny
            ntmp=id_amp1m1+j
            work(ntmp)=0.5*(work(ntmp)+work(id_amp2m1+j)
     &         +dz_loc*work(id_trarhsm1+j))
         end do

 520  continue

c==If successful record output fields

      if (abs(dz-step_total).ge.(tol*dz)) goto 2000

CDIR$ NORECURRENCE
      do j=1,nxny
         tt(ip1,j)=work(id_tt1m1+j)
      end do

      if (amplitudes) then
CDIR$ NORECURRENCE
         do j=1,nxny
            ntmp=id_amp1m1+j
            work(ntmp)=max(amp_min,min(amp_max,work(ntmp)))
            amp(ip1,j)=exp(work(ntmp))
         end do
      end if

      done=.true.
      goto 3000

c----------------------------------------------------------------
 2000 continue
c----------------------------------------------------------------
         
 3000 continue

c--If we're still not done, then we failed to find an ok step

      if (.not.done) then
         write(ipdmp,*) ' Error:TTAMP'
         write(ipdmp,*) ' after ', max_partial_steps, ' steps'
         write(ipdmp,*) ' failed to take step ', i
         ier=ier+1
         return
      end if

      itotal=itotal+i1
c==================================================================
 1000 continue
c==================================================================
      print*," ## itotal=",itotal

      return
      end
       
c========================== eikonal_rhs3d ========================

      subroutine eikonal_rhs3d(nx,ny,nz,iz,
     &     ss,tt,ham,apcut,tx,ty,txx,tyy,txy,
     &     dx,dy,dz_loc,dz,step_total,adjust,lenwork,
     &     eik_eps,work,ipdmp,idbg,ier)

c---------------------------------------------------------------
c  Computes a numerical approximation to the Hamiltonian by means
c  of a 2nd order upwind finite-difference scheme For details see
c  paper by Osher & Sethian (1988, J. Comput. Phys. 79, pp.12-49).
c---------------------------------------------------------------
c-- Arguments

      integer nx,ny,nz,         ! grid dimensions
     &        iz,               ! current depth step
     &        lenwork           ! length of "work"

      real eik_eps,             ! eikonal threshhold
     &     step_total,          ! total step taken so far
     &     dx,dy,dz,            ! steps
     &     dz_loc               ! vertical step - full step on call

      real ss(nx,ny),           ! square slowness - interpolated
     &     tt(nx,ny),           ! travel time at current depth
     &     ham(nx,ny),          ! numerical hamiltonian - rhs for eik. scheme
     &     apcut(nx,ny),        ! Hermite tapered cutoff
     &     tx(nx,ny),           ! ENO first difference of tt in x
     &     ty(nx,ny),           ! ENO first difference of tt in y
     &     txx(nx+2,ny),        ! centered 2nd difference of tt in x
     &     tyy(nx,ny+2),        ! centered 2nd difference of tt in y
     &     txy(nx,ny),
     &     work(nx,ny,1)        ! workspace
      
c--Internal Variables

      real cfl_rhs,tol,zero,compa,divdx,divdy,div2dy,
     &     sqeps, sqrham,cutham,d1eno
      integer nxm1, nym1, j, k, jx, jy,
     &        id_dpx,id_dmx,id_dpy,id_dmy,workptr
      logical adjust

      data tol /1.0e-08/

c------------------------------------------------------------------

      if (ier.ne.0) return

c--Useful numbers
      zero=0.0e+00
      one=1.0e+00
      nxm1=nx-1
      nym1=ny-1
      divdx=1./dx
      divdy=1./dy
      div2dy=1./(2.*dy)

c--Allocate memory pointers
      id_dpx=1
      id_dmx=2
      id_dpy=3
      id_dmy=4

      if (lenwork.lt.(4*nx*ny)) then
         write(ipdmp,*) ' Error: eikonal_rhs3d: lenwork is small'
         ier=ier+1
         return
      end if

c--Compute tx=D^+_x\tau and ty=D^+_y\tau

      do jy=1,ny
      do jx=1,nxm1
         tx(jx,jy)=(tt(jx+1,jy)-tt(jx,jy))*divdx
      end do
      end do

      do jy=1,ny
         tx(nx,jy)=tx(nxm1,jy)
c         tx(nx,jy)=zero
      end do

      do jy=1,nym1
      do jx=1,nx
         ty(jx,jy)=(tt(jx,jy+1)-tt(jx,jy))*divdy
      end do
      end do

      do jx=1,nx
         ty(jx,ny)= ty(jx,nym1)
c         ty(jx,ny)=zero
      end do

c--Check for inflow at four faces of the cube, return if detected

      do jx=1,nx
         if (ty(jx,1).gt.zero) ier=ier+1
      end do
      do jx=1,nx
         if (ty(jx,nym1).lt.zero) ier=ier+1
      end do

      do jy=1,ny
         if (tx(1,jy).gt.zero) ier=ier+1
         if (tx(nxm1,jy).lt.zero) ier=ier+1
      end do

      if(ier.ne.0) then
         write(ipdmp,*) ' Error: TTAMP: wrong inflow'
         return
      end if

c--Form second difference of tt .

      do jy=1,ny
         do j=3,nx
            txx(j,jy)=(tx(j-1,jy)-tx(j-2,jy))*divdx
         end do
         txx(1,jy)   =zero
         txx(2,jy)   =0.5*txx(3,jy)
         txx(nx+1,jy)=0.5*txx(nx,jy)
         txx(nx+2,jy)=zero
      end do

      do jy=3,ny
      do jx=1,nx
         tyy(jx,jy)=(ty(jx,jy-1)-ty(jx,jy-2))*divdy
      end do
      end do
      do jx=1,nx
         tyy(jx,1)   =zero
         tyy(jx,2)   =0.5*tyy(jx,3)
         tyy(jx,ny+1)=0.5*tyy(jx,ny)
         tyy(jx,ny+2)=zero
      end do

c--Compute Dxx^\pm and Dyy^\pm

      do jy=1,ny
      do jx=1,nx
         call mso3(txx(jx,jy),txx(jx+1,jy),work(jx,jy,id_dmx))
         call mso3(txx(jx+1,jy),txx(jx+2,jy),work(jx,jy,id_dpx))
      end do
      end do

      do jy=1,ny
      do jx=1,nx
         call mso3(tyy(jx,jy),tyy(jx,jy+1),work(jx,jy,id_dmy))
         call mso3(tyy(jx,jy+1),tyy(jx,jy+2),work(jx,jy,id_dpy))
      end do
      end do

c--The computations of "dma" and "dpa" have been gathered in
c  one single loop for vector "dttx" to stay in the register.

      halfdx=0.5*dx
      do jy=1,ny
         jx=1
            work(jx,jy,id_dmx)=tx(jx,jy)+halfdx*work(jx,jy,id_dmx)
            work(jx,jy,id_dpx)=tx(jx,jy)-halfdx*work(jx,jy,id_dpx)
         do jx=2,nx
            work(jx,jy,id_dmx)=tx(jx-1,jy)+halfdx*work(jx,jy,id_dmx)
            work(jx,jy,id_dpx)=tx(jx,jy)  -halfdx*work(jx,jy,id_dpx)
         end do
      end do

      halfdy=0.5*dy
      jy=1
      do jx=1,nx
         work(jx,jy,id_dmy)=ty(jx,jy)+halfdy*work(jx,jy,id_dmy)
         work(jx,jy,id_dpy)=ty(jx,jy)-halfdy*work(jx,jy,id_dpy)
      end do

      do jy=2,ny
      do jx=1,nx
         work(jx,jy,id_dmy)=ty(jx,jy-1)+halfdy*work(jx,jy,id_dmy)
         work(jx,jy,id_dpy)=ty(jx,jy)  -halfdy*work(jx,jy,id_dpy)
      end do
      end do

c--Compute the Hamiltonian; interleave computation of CFL step, but
c  only change dz_loc if desired THRESHHOLD at sqeps*ss, quadratic taper to
c  0.5*sqeps*ss; also return Hermite cutoff

c      cfl_rhs=0.5*min(dx,dy)
      cfl_rhs=dx*dy/(sqrt(dx**2+dy**2)*1.1)
      compa= dz-step_total
      sqeps= eik_eps**2

      do jy=1,ny
      do jx=1,nx

c         d1eno=(max(work(jx,jy,id_dmx),zero))**2
c     &        +(min(work(jx,jy,id_dpx),zero))**2
c     &        +(max(work(jx,jy,id_dmy),zero))**2
c     &        +(min(work(jx,jy,id_dpy),zero))**2

         d1eno=(max(max(work(jx,jy,id_dmx),zero),
     &            -(min(work(jx,jy,id_dpx),zero))))**2
     &        +(max(max(work(jx,jy,id_dmy),zero),
     &            -(min(work(jx,jy,id_dpy),zero))))**2

         sqslow=ss(jx,jy)

         sqrham=sqslow-d1eno     ! =x for "bmax(x,a)"
         cutham=sqeps*sqslow     ! =a

         if (sqrham.ge.(2.*cutham)) then
            ham(jx,jy)=sqrt(sqrham)
            apcut(jx,jy)=one       ! the function "g"
	 else if(sqrham.le.zero) then
            ham(jx,jy)=sqrt(cutham)
	    apcut(jx,jy)=zero
	 else
            ham(jx,jy)= sqrt(cutham+sqrham**2/(4.*cutham))
            if (sqrham.le.cutham) then
	       apcut(jx,jy)=zero
            else
               tmp=(sqrham-cutham)/cutham
	       apcut(jx,jy)=(-2.*tmp+3.)*tmp**2
            end if
	 end if

         if (adjust) then
            compa=min(compa,cfl_rhs*ham(jx,jy)/(max(sqrt(d1eno),tol)))
         end if

      end do
      end do

c--Safeguarded CFL step selection

      if (adjust) then
         dz_loc= min(dz-step_total,compa)
         remain=dz-(step_total+dz_loc)     ! avoid very small steps
         if (remain.lt.(0.05*dz_loc)) then
            dz_loc=dz-step_total
         else if (remain.lt.(0.25*dz_loc)) then
            dz_loc=0.6*(dz_loc+remain)
         end if
      end if

c==The following corresponds to amplitude computation
c-----------------------------------------------------

c--Compute txy

      div4dy=1./(4.*dy)
      do jy=2,nym1
         jx=1
         txy(jx,jy)=(tx(jx,jy+1)-tx(jx,jy-1))*div2dy
         do jx=2,nx
            txy(jx,jy)=( (tx(jx-1,jy+1)+tx(jx,jy+1))
     &                  -(tx(jx-1,jy-1)+tx(jx,jy-1)) )*div4dy
         end do
      end do

      do jx=1,nx
         txy(jx,1)=txy(jx,2)
      end do

      do jx=1,nx
         txy(jx,ny)=txy(jx,nym1)
      end do

c--Now save ENO_tx and ENO_ty

      do jy=1,ny
      do jx=1,nx
         tx(jx,jy)=max(work(jx,jy,id_dmx),zero)
     &            +min(work(jx,jy,id_dpx),zero)
      end do
      end do

      do jy=1,ny
      do jx=1,nx
         ty(jx,jy)=max(work(jx,jy,id_dmy),zero)
     &            +min(work(jx,jy,id_dpy),zero)
      end do
      end do

      return
      end

c========================= transport_rhs3d =======================

      subroutine transport_rhs3d(nx, ny, nz, iz,  
     &     ssi, ssz, tt, amp, rhs,
     &     tx, ty, txx, tyy, txy, ham, apcut,
     &     dx, dy, dz, dz_loc, step_total,
     &     lenwork,work,ipdmp,idbg,ier,id_eno)
c---------------------------------------------------------------------
c    UPWIND TRANSPORT SOLVER
c
c    Produces 2nd order Beam-Warming rhs for 
c    transport equation. Written for log amp version of
c    transport equation:
c
c    2 \nabla \tau \cdot \nabla (\log a) + \nabla^2 \tau = 0
c
c    Uses upwind first differences, centered second differences 
c    of travel time, (these are passed from EIKONAL_RHS) and upwind
c    second order differences of amplitudes. Modifies rhs to reduce
c    out-of-aperture amplitudes
c
c    WWS    12.93
c    RV     02.94
c    RV     05.94 (loops unrolled for IBM)
c    WWS    07.94
c    SKIM   06.97
c---------------------------------------------------------------------
c--Arguments
      integer
     &     nx, ny, nz,          ! grid dimensions
     &     iz,                  ! current depth step
     &     lenwork,             ! length of "work"
     &     idbg,ipdmp,ier,      ! debug and error
     &     id_eno               ! 2nd-order ENO ux & uy (=1:ENO, =0:one-sided)
      real 
     &     step_total,          ! total step taken so far
     &     dx,dy,               ! horizontal step
     &     dz,dz_loc,           ! vertical step
     &     ssi(nx,ny),          ! square slowness array - interpolated
     &     ssz(nx,ny),          ! z-derivative of square slowness
     &     tt(nx,ny),           ! travel time at current depth
     &     amp(nx,ny),          ! amplitude at current depth
     &     rhs(nx,ny),          ! rhs for transp. scheme
     &     tx(nx,ny),           ! ENO first divided x-diff of tt
     &     ty(nx,ny),           ! ENO first divided y-diff of tt
     &     txx(nx+2,ny),        ! second divided x-diff of tt (centered)
     &     tyy(nx,ny+2),        ! second divided y-diff of tt (centered)
     &     apcut(nx,ny),        ! aperture cut
     &     ham(nx,ny),          ! hamiltonian = tz
     &     txy(nx,ny),          ! buffer for tx 1st difference in y
     &     work(nx,ny,1)        ! workspace

      real zero,divdz,divdx,divdy,div2dx,div2dy,slap_max

      integer id_ssx,id_ssy,id_slap,
     &        id_uxm,id_uxp,id_uym,id_uyp,workptr 

      data decay /1.0e+00/
c-----------------------------------------------------------------------

      if (ier.ne.0) return

c==Allocate memory

      id_ssx=1
      id_ssy=2
      id_slap=3
      id_uxm=4
      id_uym=5
      id_uxp=6
      id_uyp=7

      if (lenwork.lt.(7*nx*ny)) then
         write(ipdmp,*) ' Error: TRANS_RHS: lenwork too small'
         ier=ier+1
         return
      end if

c==Useful numbers

      zero=0.0e+00
      nxm1=nx-1
      nxm2=nx-2
      nym1=ny-1
      nym2=ny-2
      divdx=1./dx
      divdy=1./dy	
      divdz=1./dz
      div2dx=1./(2.*dx)
      div2dy=1./(2.*dy)

c==square slowness second order first difference in x,y-directions,
c  linearly extrapolated at ends

      do jy=1,ny
      do jx=2,nxm1
         work(jx,jy,id_ssx)=(ssi(jx+1,jy)-ssi(jx-1,jy))*div2dx
      end do
      end do

      do jy=1,ny
         work(1,jy,id_ssx)=2.*work(2,jy,id_ssx)-work(3,jy,id_ssx)
         work(nx,jy,id_ssx)=2.*work(nxm1,jy,id_ssx)-work(nxm2,jy,id_ssx)
      end do

      do jy=2,nym1
      do jx=1,nx
         work(jx,jy,id_ssy)=(ssi(jx,jy+1)-ssi(jx,jy-1))*div2dy
      end do
      end do

      do jx=1,nx
         work(jx,1,id_ssy)=2.*work(jx,2,id_ssy)-work(jx,3,id_ssy)
      end do

      do jx=1,nx
         work(jx,ny,id_ssy)=2.*work(jx,nym1,id_ssy)-work(jx,nym2,id_ssy)
      end do

c==now we can get "Laplacian\tau"

      do 1000 jy=1,ny
      do 1200 jx=1,nx

         taux=tx(jx,jy)
         tauy=ty(jx,jy)
         tauz=ham(jx,jy)
         tauxx=txx(jx+1,jy)
         tauyy=tyy(jx,jy+1)
         tauxy=txy(jx,jy)
         rtauz=1.0/tauz

         tauzz=( 0.5*( tauz*ssz(jx,jy)-taux*work(jx,jy,id_ssx)
     &                -tauy*work(jx,jy,id_ssy) )
     &            +taux**2*tauxx +tauy**2*tauyy +2.*taux*tauy*tauxy )
     &         * (rtauz**2)

         work(jx,jy,id_slap)=0.5*rtauz*(tauxx+tauyy+tauzz)

 1200 continue
 1000 continue

c==determine max of slap, apply aperture cutoff - outside of aperture
c  make inhomogeneous term equal to decay * max in aperture

      slap_max=zero
      do jy=1,ny
      do jx=1,nx
         slap_max=max(slap_max,work(jx,jy,id_slap))
      end do
      end do

      do jy=1,ny
      do jx=1,nx
         tmpcut=apcut(jx,jy)
         work(jx,jy,id_slap)=tmpcut*work(jx,jy,id_slap)
     &          +decay*(1.-tmpcut)*slap_max
      end do
      end do

c--choose one of finite difference schemes for ux and uy.
c-----------------------------------------------------------
      goto(10,20) (id_eno+1)
c-----------------------------------------------------------

 10   continue
c==one-sided second order difference of amplitude

c--id_uxm

      do jy=1,ny
      do jx=3,nx
         work(jx,jy,id_uxm)=(1.5*amp(jx,jy)-2.0*amp(jx-1,jy)
     &      +0.5*amp(jx-2,jy))*divdx
      end do
      end do

      do jy=1,ny
         work(2,jy,id_uxm)=(amp(3,jy)-amp(1,jy))*div2dx
         work(1,jy,id_uxm)=work(2,jy,id_uxm)
      end do

c--id_uxp

      do jy=1,ny
      do jx=1,nxm2
         work(jx,jy,id_uxp)=(-1.5*amp(jx,jy)+2.0*amp(jx+1,jy)
     &      -0.5*amp(jx+2,jy))*divdx
      end do
      end do

      do jy=1,ny
         work(nxm1,jy,id_uxp)=(amp(nx,jy)-amp(nxm2,jy))*div2dx
         work(nx,  jy,id_uxp)=work(nxm1,jy,id_uxp)
      end do

c--id_uym

      do jy=3,ny
      do jx=1,nx
         work(jx,jy,id_uym)=(1.5*amp(jx,jy)-2.0*amp(jx,jy-1)
     &      +0.5*amp(jx,jy-2))*divdy
      end do
      end do

      do jx=1,nx
         work(jx,2,id_uym)=(amp(jx,3)-amp(jx,1))*div2dy
         work(jx,1,id_uym)=work(jx,2,id_uym)
      end do
 
c--id_uyp

      do jy=1,nym2
      do jx=1,nx
         work(jx,jy,id_uyp)=(-1.5*amp(jx,jy)+2.0*amp(jx,jy+1)
     &      -0.5*amp(jx,jy+2))*divdy
      end do
      end do

      do jx=1,nx
         work(jx,nym1,id_uyp)=(amp(jx,ny)-amp(jx,nym2))*div2dy
         work(jx,ny,  id_uyp)=work(jx,nym1,id_uyp)
      end do

      goto 100
 
 20   continue

c==2nd-order ENO schemes for ux and uy
c  e.g.  dd1 = [(D^+)(D^-)u]*dx/2
c-----------------------------------------------------------
c--id_uxm & id_uxp

      do jy=1,ny
         jx=2
            jxm1=jx-1
            d0  =(amp(jx,jy)-amp(jxm1,jy))*divdx
            dd1 =(amp(jxm1,jy)-2.0*amp(jx,jy)+amp(jx+1,jy))*div2dx
            work(jx,  jy,id_uxm)=d0+dd1
            work(jxm1,jy,id_uxp)=d0-dd1
            work(jxm1,jy,id_uxm)=work(jxm1,jy,id_uxp)
         do jx=3,nxm1
            jxm1=jx-1
            d0  =(amp(jx,jy)-amp(jxm1,jy))*divdx
            dd0 =dd1
            dd1 =(amp(jxm1,jy)-2.0*amp(jx,jy)+amp(jx+1,jy))*div2dx
            if (dd0.lt.zero .and. dd1.lt.zero) then
               work(jx,  jy,id_uxm)=d0+max(dd0,dd1)
               work(jxm1,jy,id_uxp)=d0-max(dd0,dd1)
            else if (dd0.gt.zero .and. dd1.gt.zero) then
               work(jx,  jy,id_uxm)=d0+min(dd0,dd1)
               work(jxm1,jy,id_uxp)=d0-min(dd0,dd1)
            else
               work(jx,  jy,id_uxm)=d0
               work(jxm1,jy,id_uxp)=d0
            end if
         end do
            jx=nx
            jxm1=jx-1
            d0  =(amp(jx,jy)-amp(jxm1,jy))*divdx
            work(jx,  jy,id_uxm)=d0+dd1
            work(jxm1,jy,id_uxp)=d0-dd1
            work(jx,  jy,id_uxp)=work(jx,jy,id_uxm)
      end do

c--id_uym & id_uyp

      jy=2
         jym1=jy-1
         jyp1=jy+1
         do jx=1,nx
            d0  =(amp(jx,jy)-amp(jx,jym1))*divdy
            dd1 =(amp(jx,jym1)-2.0*amp(jx,jy)+amp(jx,jyp1))*div2dy
            work(jx,jy,  id_uym)=d0+dd1
            work(jx,jym1,id_uyp)=d0-dd1
            work(jx,jym1,id_uym)=work(jx,jym1,id_uyp)
         end do
      do jy=3,nym1
         jym1=jy-1
         jym2=jy-2
         jyp1=jy+1
         do jx=1,nx
            d0  =(amp(jx,jy)-amp(jx,jym1))*divdy
            dd0 =(amp(jx,jym2)-2.0*amp(jx,jym1)+amp(jx,jy))*div2dy
            dd1 =(amp(jx,jym1)-2.0*amp(jx,jy)+amp(jx,jyp1))*div2dy
            if (dd0.lt.zero .and. dd1.lt.zero) then
               work(jx,jy,  id_uym)=d0+max(dd0,dd1)
               work(jx,jym1,id_uyp)=d0-max(dd0,dd1)
            else if (dd0.gt.zero .and. dd1.gt.zero) then
               work(jx,jy,  id_uym)=d0+min(dd0,dd1)
               work(jx,jym1,id_uyp)=d0-min(dd0,dd1)
            else
               work(jx,jy,  id_uym)=d0
               work(jx,jym1,id_uyp)=d0
            end if
         end do
      end do
      jy=ny
         jym1=jy-1
         jym2=jy-2
         do jx=1,nx
            d0  =(amp(jx,jy)-amp(jx,jym1))*divdy
            dd1 =(amp(jx,jym2)-2.0*amp(jx,jym1)+amp(jx,jy))*div2dy
            work(jx,jy,  id_uym)=d0+dd1
            work(jx,jym1,id_uyp)=d0-dd1
            work(jx,jy,  id_uyp)=work(jx,jy,id_uym)
         end do

 100  continue
c==Now, combine the RHS of the transport eqn
c-----------------------------------------------------------

      do 2000 jy=1,ny
      do 2200 jx=1,nx

         tmp=tx(jx,jy)
         if (tmp.gt.zero) then
            rhs1=tmp*work(jx,jy,id_uxm)
         else
            rhs1=tmp*work(jx,jy,id_uxp)
         end if

         tmp=ty(jx,jy)
         if (tmp.gt.zero) then
            rhs2=tmp*work(jx,jy,id_uym)
         else
            rhs2=tmp*work(jx,jy,id_uyp)
         end if

         rhs(jx,jy)=-(work(jx,jy,id_slap) +(rhs1+rhs2)/ham(jx,jy) )

 2200 continue
 2000 continue

      return
      end

c========================= mso3 ==================================

      subroutine mso3 ( x, y, p)         
c------------------------------------------
c      This subroutine implements 
c        p = x if  |x| <= |y| and xy >= 0
c          = y if  |y| <  |x| and xy >= 0
c          = 0 if  xy  <   0
c      via the formula
c        p = min(xp,yp) + max(xm,ym)
c      where
c       xp = max(x,0);
c       xm = min(x,0);
c       yp = max(y,0);
c       ym = min(y,0);
c
c      Bill SYMES, 05/93
c      Quang Huy TRAN, 07/93
c      RV 3/94
c------------------------------------------
      real x, y, p
      real zero
c========================= 
      zero= 0.0e+00
      p= min(max(x,zero),max(y,zero)) +max(min(x,zero),min(y,zero))
      return
      end

