c------------------------------------------------------------
c     KIRCHHOFF FORWARD MODELING
c     
c     This code a combination of work by
c     the TRIP group
c     
c     w  symes
c     r  versteeg
c     h  tran
c     a  sei
c     m  kern
c     k  araya
c     
c     This version: 02.96, WWS
c
c NOTES: this is an attempt to use a design worked out in
c 93 - 94 by the folks listed above, together with an evolved
c (and hopefully more maintainable) driver structure. The 
c original setup is largely the work of RV, and is archived
c in dso3.1. 
c
c The current memory management principle is as follows: nblock
c simulations will be carried out (ie. nblock shots). nblock
c is initialized in the job table.
c
c To accomodate the new driver, which loads records one at a 
c time, I have added some code at the top which determines
c how much of the work array must be reserved for the input
c data - the output space has already been reserved by the 
c driver, and does not appear as a part of work here.

c VERSION of 11.11.95 (WWS)
c
c     Logic of Kirchhoff summation: simulation by blocked gathers
c
c     for each block
c         for each trace
c             for each gather
c                 compute source position
c                 compute receiver position
c                 read source traveltime table
c                 read receiver traveltime table
c                 compute trace
c             next gather
c         next trace
c     next block
c
c     This subroutine performs the block computation (inner two loops);
c     LINE is set up to perform the outer loop over blocks.
c
c     Blocking strategy: in order to minimize i/o, rely on READ_TT
c     to cache a number of COARSE GRID traveltime tables in core. Arrange 
c     the order of trace computation so that only one or two new tables
c     need to be read from disk or computed for each trace index. Each 
c     table is interpolated as it is read. Thus only two interpolated
c     FINE GRID tables are stored for each trace. This design trades 
c     some computation for memory usage - execution profiles show that
c     interpolation takes up to 10% of CPU time, for FINE/COARSE = 4,
c     but 16 times as many tables can be cached in core as if fine grids
c     were stored as in RV's old design. As shown below, this design 
c     trivially works as well for common offset sim/mig/inv (COG) as for
c     common source (CSG).
c
c     The cache is treated as a disk cache, and only activated if the
c     traveltimes are also stored to disk. 
c
c     04.01.96: Current implementation stores traveltimes to disk as
c     they are computed. This may be changed to storing them only when
c     they are bumped from cache, which may further reduce the i/o under
c     some circumstances.
c
c     The size of the cache is set in the job deck. Setting the cache size
c     to larger than the available memory will result in a run-time error. 
c     Eventually someone may figure out a nice way to adjust the cache size
c     at run time, by returning memory usage from the ttamp solver. Since 
c     the cache size is set at runtime, this information could be used to 
c     modify it.
c
c     The following analysis assumes that shot and receiver spacings
c     are same. If this assumption is not true then the efficiency
c     of the scheme is in general less. The cache mgmt. remains the
c     same, regardless of src and recvr spacing.
c     
c     Notation: g = gather index
c               t = trace index
c             src = src location index
c             rec = rec location index
c
c     For common source gather: src = src(g)
c                               rec = rec(t+g)
c     As g ranges over nblock values, both src and rec range over
c     nblock values. When t --> t+1, source locations remain same
c     but receiver locations shift by 1. Therefore if 2*nblock tables
c     are cached then for each trace only one table must be read or 
c     computed (as opposed to being simply passed back from the cache). 
c     The total number of tables read per block 
c     is nblock+ntraces = nblock+n(recvrs/shot). The number of blocks
c     is n(shots)/nblock. Therefore the total number of tables 
c     read or computed is n(shots)+n(recvrs/shot)*n(shots)/nblock.
c     If the shot and receiver spacings are the same (or commensurable)
c     then most of these will be disk reads rather than computations
c     (calls to the eikonal/transport solvers).
c
c     If someone is clever they may be able to arrange to do the read
c     asynchronously, overlapping the trace computations using tables
c     already cached. This will be especially important in the parallel
c     design.
c
c     MUST ARRANGE that READ_TT KEEPS the source tables and only discards
c     the receiver tables. This cannot work if it only keeps one list and
c     discards the oldest.
c
c     For common offset gather: src = src(t)
c                               rec = rec(t+g)
c     As g ranges over nblock values rec ranges over nblock values.
c     When t --> t+1 (in general) src and rec both shift by 1. Therefore
c     if at least nblock tables are cached then only two tables must be
c     read or computed for each trace index. The total number of tables
c     read per block is at most 2*ntraces = 2*n(shots), and the
c     total for the entire simulation is 2*n(shots)*n(recvrs/shot)/nblock.
c
c     Note that this means: with the same amt. of storage FOR TTAMP TABLES
c     can achieve the same amt. of read/computes for either CSG or COG - 
c     nblock(COG) = 2*nblock(CSG). Of course this does impose a heavier
c     memory burden on COG, since twice as much input data must be loaded.
c
c     If the cache is sufficiently large, and/or if source and receiver
c     positions overlap, actually i/o and computation can be even less.
c
      subroutine kirch_fwd(inv_flag,spd_flag,vel,refl,src,
     &     dvel,dvel_there,drefl,drefl_there,seism,
     &     seismnt,seismnx,seismnxs,seismdt,seismdx,seismdxs,
     &     seismtorig,seismxorig,seismxsorig,srcdep,recdep,
     &     velnz,velnx,veldz,veldx,velzorig,velxorig,
     &     srcnt,srcdt,reflnz,reflnx,reflnxs,refldz,refldx,
     &     reflzorig,reflxorig,
     &     amp_op,ap,zd,t0,tf,mv,wmz,ntaper,
     &     save_ttamp,tt_sam,tt_size,nblock,gather_flag,
     &     work_ptr,len_work,work,
     &     firstrec,lastrec,irecord,
     &     done,update,ipout,ipdmp,ier)
      
c=============================================================
c ANNOTATIONS FOR AUTOMATIC INTERFACE BUILD (WWS, 29.9.94)    
c=============================================================
c$dso mode forward
c$dso subroutine kirch_fwd
c$dso begin datasets
c$dso   begin reference
c$dso     reference vel
c$dso     reference refl
c$dso     reference src
c$dso     reference refdata *
c$dso   end reference
c$dso   begin perturbation
c$dso     perturbation dvel
c$dso     perturbation drefl
c$dso   end perturbation
c$dso   begin output
c$dso     output seism refdata
c$dso   end output
c$dso end datasets
c$dso begin parameters
c       X           X                 X            X          X
c$dso job    int    inv_flag                       HF inversion (1) or migration (0)
c$dso job    int    spd_flag                       2.5D amplitude (1) 2D amplitude (0)
c$dso array         vel               input
c$dso array         refl              input
c$dso array         src               input
c$dso array         dvel              input       flag
c$dso array         drefl             input       flag
c$dso array         seism             output
c$dso header int    seismnt           refdata      number of samples per trace
c$dso header int    seismnx           refdata      number of traces per shot
c$dso header int    seismnxs          refdata      number of shot records
c$dso header float  seismdt           refdata      sample interval 
c$dso header float  seismdx           refdata      trace interval
c$dso header float  seismdxs          refdata      shot interval
c$dso header float  seismtorig        refdata      start time
c$dso header float  seismxorig        refdata      offset of first trace
c$dso header float  seismxsorig       refdata      location of first shot in line
c$dso header float  srcdep            refdata      shot depth
c$dso header float  recdep            refdata      trace depth
c$dso header int    velnz             vel          number of samples per trace
c$dso header int    velnx             vel          number of traces per shot
c$dso header float  veldz             vel          sample interval
c$dso header float  veldx             vel          trace interval
c$dso header float  velzorig          vel          start time
c$dso header float  velxorig          vel          offset of first trace
c$dso header int    srcnt             src       number of samples per trace
c$dso header float  srcdt             src       sample interval
c$dso header int    reflnz            refl         number of samples per trace
c$dso header int    reflnx            refl         number of traces per shot
c$dso header int    reflnxs           refl         number of shot records
c$dso header float  refldz            refl         sample interval
c$dso header float  refldx            refl         trace interval
c$dso header float  reflzorig         refl         start time
c$dso header float  reflxorig         refl         offset of first trace
c$dso job    int    amp_op                         variable (0) constant (1) velocity option
c$dso job    float  ap                             Kirchhoff aperture cutoff (degrees)
c$dso job    float  zd                             depth for reflectivity cutoff
c$dso job    float  t0                             mute zero offset intercept
c$dso job    float  tf                             mute final time
c$dso job    float  mv                             mute velocity
c$dso job    float  wmz                            width of mute zone
c$dso job    int    ntaper                         number of traces to taper
c$dso job    int    save_ttamp                     save tt/amp tables
c$dso job    int    tt_sam                         traveltime grid sampling factor
c$dso job    int    tt_size                        traveltime buffer size (floats)
c$dso job    int    nblock                         blocking factor
c$dso job    int    gather_flag                    common src (0) or offset (1)
c$dso end parameters

c     ----------------------------------------------------------
      
      integer 
     &     tt_sam,              ! resampling rate for traveltime map
     &     tt_size,             ! traveltime and amplitude buffer size in words
     &     amp_op,              ! flag for forward amplitude option
     &     velnz,               ! dimension velocity model first dimension
     &     velnx,               ! dimension velocity model in second dimension
     &     reflnz,              ! first dimension reflectivity model 
     &     reflnx,              ! second dimension reflectivity model 
     &     reflnxs,             ! third dimension reflectivity model 
     &     seismnt,             ! dimension seismogram in first (sample) 
c                                 dimension
     &     seismnx,             ! dimension seismogram in second (trace) 
c                                 dimension
     &     seismnxs             ! dimension seismogram in third (record) 
c                                 dimension
      integer 
     &     firstrec,            ! first record to be traced in this run
     &     lastrec,             ! last record to be traced in this run
     &     irecord,             ! index of current input record
     &     nblock,              ! number of records to be created
     &     update,              ! update flag - new bg (1) or old (0)
     &     save_ttamp           ! flag whether tt/amp tables should be saved

      logical done              ! done flag
      integer inv_flag          ! integer flag: = 1 for inversion, = 0 for adjoint
      integer spd_flag          ! integer flag: = 1 for 2.5D, = 0 for 2D

      integer 
     &     srcnt,               ! number of source samples
     &     gather_flag,         ! gather flag (0: CSG, 1: COG)
     &     ntaper,              ! number of traces to be tapered
     &     ipdmp,               ! dump file unit
     &     ipout,               ! output file unit (terminal)
     &     ier                  ! error flag

      real 
     &     seismdt,             ! sample distance in traces seismogram
     &     seismdx,             ! sample distance between traces seismogram
     &     seismdxs,            ! sample distance between records seismogram 
     &     seismtorig,          ! initial offset trace
     &     seismxorig,          ! initial trace offset first trace
     &     seismxsorig,         ! location of first shot in line
     &     srcdep,              ! shot depth
     &     recdep,              ! trace depth
     &     srcdt                ! source sample distance
      real
     &     t0,                  ! mute zero offset intercept
     &     tf,                  ! mute final time
     &     mv,                  ! muting velocity
     &     wmz,                 ! width of mute zone
     &     veldz,               ! sample distance in first direction velocity model
     &     veldx,               ! sample distance in second direction velocity model
     &     velzorig,            ! initial offset velocity model first direction
     &     velxorig,            ! initial offset velocity model second direction
     &     refldz,              ! sample distance in first direction reflectivity model
     &     refldx,              ! sample distance in second direction reflectivity model
     &     reflzorig,           ! initial offset reflectivity model first direction
     &     reflxorig,           ! initial offset reflectivity model second direction
     &     rho                  ! density
      real 
     &     ap,                  ! aperture of amplitude calculation kirchhoff
     &     zd                   ! datum depth

c declarations of arrays

      real 
     &     vel(*),              ! velocity model
     &     refl(*),             ! reference reflectivity
     &     dvel(*),             ! velocity model perturbation
     &     drefl(*),            ! reflectivity perturbation
     &     src(*),              ! source
     &     seism(*)             ! seismogram (output)

      integer 
     &     dvel_there,          ! there flag for dvel
     &     drefl_there          ! there flag for drefl

c workspace

      integer 
     &     len_work,            ! length of available workspace
     &     work_ptr             ! first word of available workspace
      real 
     &     work(*)              ! work array

c local work pointers

      integer
     &     tt_ptr,              ! pointer to coarse grid ttamp buffer
     &     drefl_ptr,           ! pointer to output refl
     &     trace_ptr,           ! pointer into seism for current trace
     &     tmp_ptr,             ! temp storage for a trace
     &     ptrace_ptr,          ! storage for preprocessed trace
     &     aux_ptr,             ! workspace for convolution 
     &     factor_ptr,          ! workspace for scaled squared velocity
     &     refl_ptr             ! only one needed

      integer
     &     cache_flag,          ! set by READ_TT if it caches ttamp tables -
c                                 initialized to 0 here to cause READ_TT to
c                                 initialize cache tables
     &     src_token,           ! identifier for src tt tables
     &     rec_token            ! identifier for rec tt tables

      integer lenwork_loc       ! length of work segment passed to subs, after
c                                 allocating local space

c getbuf variables

      integer 
     &     next,                ! index of next free word in work
     &     size                 ! size of bite to grab out of buffer

c local variables

c nfls is the number of traveltime tables already computed with
c the current model and saved to disk.
c nfls = 0 at the beginning of a new simulation with a new 
c model. It should be saved between calls to KIRCH_ADJ
c It needs to be initialized when irecord=1 (beginning of simulation). 
c Since READ_TT does not have access to irecord, it must be initialized
c and saved here to avoid excessive disk i/o. When update=0 (use old
c ttamp tables if possible) it should also be saved between simulations,
c i.e. between calls to the subroutine which calls this one. The only 
c obvious way to save it between simulations without adding a new special 
c common block and initialization code in the main DSO module (against 
c the DSO design principles) is to write it to a file on exit, and read 
c the file (if it exists) to initialize when update=0 and irecord=1. This
c strategy runs the risk of an unintentional initialization, i.e. when
c update=0 and the contents of the file relate to a different execution,
c hence the initialized nfls will not correctly count the number of ttamp
c files. I don't know what to do about this apart from keeping update=1
c as the default and managing this file, eg. by running from a script
c in a newly created directory.

      integer nfls

      save nfls

      logical found

      character*80 
     &     posnfile             ! filename for nfls

      integer
     &     posnunit             ! unit number for nfls file

      logical inversion         ! flag true for inversion false for adjoint 

      logical three_d           ! flag true for 2.5D false for 2D amplitudes

      integer ntables           ! number of tables in a traveltime record:
c                                 = 2 for 2D modelling (tt, amp)
c                                 = 5 for 2.5D modelling (tt, amp, tx, tz, tyy)
      integer
     &     recs_loaded,         ! guess
     &     idbg,                ! debug flag
     &     i,j,k                  ! counting indices

c grid compatibility tolerance

      real tol

c local block arrays

      integer
     &     recnzbox,  ! number of z samples in recvr tt box   
     &     recnxbox,  ! number of x samples in recvr tt box
     &     recoff,    ! pointers to work segments holding recvr tt/amp tables
     &     srcnzbox,  ! number of z samples in source tt box   
     &     srcnxbox,  ! number of x samples in source tt box
     &     srcoff     ! pointers to work segments holding src tt/amp tables

      real
     &     recboxorig,! x offset of receiver tt box
     &     recpos,    ! x offset of receiver
     &     srcboxorig,! x offset of source tt box
     &     srcpos     ! x offset of source

      character*3 
     &     gather_type,         ! CSG or COG
     &     mode                 ! mode flag for difference, convolution ops

c useful numbers

      real fzero
      integer ione

c counters for data loads

      save recs_loaded

c======================= BEGIN EXECUTABLE INSTRUCTIONS =================

      if (ier.ne.0) return
      
      call get_debug_level('kirch_fwd','idbg',idbg,ier)

      if (dvel_there.ne.0) then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' dvel_there = ',dvel_there
         write(ipdmp,*)' velocity perturbation not yet implemented!'
         write(ipdmp,*)' ipdmp = ',ipdmp
         write(ipdmp,*)' ipout = ',ipout
         ier=1
         return
      end if

c The following code cycles back to the driver to fill
c up the segment of work beginning at refl_ptr with records
c from refl. record the number of records loaded this cycle
c in recs_loaded. Keep going until recs_loaded=nblock. Don't
c need to move other input arrays into work since they are 
c all single record.

      if (irecord.eq.1) then
         recs_loaded=0
         if (update.ne.0) then
            nfls=0
         else

c if update = 0, read nfls from file if file exists, else = 0.

            call getblanks(posnfile)
            posnfile(1:8)='posn.job'
            found=.false.
            inquire(file=posnfile,exist=found)
            if (found) then
               if (idbg.gt.0) then
                  write(ipdmp,*)' KIRCH_FWD: found nfls file ',
     &              posnfile
                  write(ipdmp,*)' read nfls from this file'
               end if
               call fileopen(posnfile,posnunit,1,ier)
               if (ier.ne.0) then
                  write(ipdmp,*)' Error: KIRCH_FWD from FILEOPEN'
                  write(ipdmp,*)' on open of position file ',posnfile
                  return
               end if
               read(posnunit,2000)nfls
               if (idbg.gt.0) then
                  write(ipdmp,*)' number of saved tables = ',nfls
               end if
               call filecls(posnunit,ier)
               if (ier.ne.0) then
                  write(ipdmp,*)' Error: READ_TT from FILECLS'
                  write(ipdmp,*)' on close of position file ',posnfile
                  return
               end if
            else
               if (idbg.ne.0) then
                  write(ipdmp,*)' KIRCH_FWD: no nfls file, set nfls=0'
               end if
               nfls=0
            end if
         end if
      end if

c detect failure to find enough workspace for output - in this 
c design it is fatal. 

      if (irecord-firstrec+1.le.recs_loaded) then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' not enough output space - somehow'
         write(ipdmp,*)' either there is not enough workspace'
         write(ipdmp,*)' allocated to this job, or nblock is'
         write(ipdmp,*)' initially too large, or calc_space_needed is'
         write(ipdmp,*)' messed up'
         write(ipdmp,*)' nblock       = ',nblock
         write(ipdmp,*)' irecord      = ',irecord
         write(ipdmp,*)' firstrec     = ',firstrec
         write(ipdmp,*)' lastrec      = ',lastrec
         write(ipdmp,*)' work_ptr     = ',work_ptr
         write(ipdmp,*)' len_work     = ',len_work
         write(ipdmp,*)' refl_ptr     = ',refl_ptr
         write(ipdmp,*)' recs_loaded  = ',recs_loaded
         ier=1
         return
      end if

c reset nblock if necessary to avoid running off end of simulation

      nblock = min(nblock, seismnxs+1-firstrec)

c if we are the beginning of another cycle reinitialize refl_ptr

      if (recs_loaded.eq.0) then
         if (idbg.gt.0) then
            write(ipout,*)
            write(ipout,*)' NEW INPUT CYCLE'
         end if
         refl_ptr=work_ptr
      end if

      if (idbg.gt.0) then
         write(ipdmp,*)' KIRCH_FWD:'
         write(ipdmp,*)' nblock       = ',nblock
         write(ipdmp,*)' irecord      = ',irecord
         write(ipdmp,*)' firstrec     = ',firstrec
         write(ipdmp,*)' lastrec      = ',lastrec
         write(ipdmp,*)' work_ptr     = ',work_ptr
         write(ipdmp,*)' len_work     = ',len_work
         write(ipdmp,*)' refl_ptr     = ',refl_ptr
         write(ipdmp,*)' recs_loaded  = ',recs_loaded
      end if

      i=1
      j=reflnz*reflnx

c note that in this design it is an error to run out of
c workspace here

      if (j.gt.len_work) then
         write(ipdmp,*)' Error: INV'
         write(ipdmp,*)' ran out of workspace'
         write(ipdmp,*)' irecord = ',irecord
         write(ipdmp,*)' recs_loaded = ',recs_loaded
         write(ipdmp,*)' work_ptr = ',work_ptr
         write(ipdmp,*)' len_work = ',len_work
         write(ipdmp,*)' words needed = ',j
         ier=1
         return
      end if

      if (idbg.gt.0) then
         write(ipout,*)' LOADING DATA FOR SIMULATION ',irecord
      end if
      if (drefl_there.eq.0) then
         call scopy(j,refl,i,work(work_ptr),i)
      else
         call scopy(j,drefl,i,work(work_ptr),i)
      end if

      work_ptr=work_ptr+j
      len_work=len_work-j
      recs_loaded=recs_loaded+1

      if (idbg.ne.0) then
         write(ipdmp,*)' FWD: loaded ',j,' refl words'
         write(ipdmp,*)' record ',irecord
         write(ipdmp,*)' beginning of refl ',refl_ptr
         write(ipdmp,*)' work location ',work_ptr
         write(ipdmp,*)' rmng workspace ',len_work
      end if

c either go back for more, or reset recs_loaded and go on
c to carry out the calculation.
      
      if (recs_loaded.lt.nblock) then
         if (idbg.gt.0) then
            write(ipdmp,*)' KIRCH_FWD: returning for more !'
            write(ipdmp,*)' recs_loaded   = ',recs_loaded
            write(ipdmp,*)' nblock        = ',nblock
            write(ipdmp,*)' irecord       = ',irecord
         end if
         return
      else
         recs_loaded=0
         lastrec=firstrec+nblock-1
         if (idbg.gt.0) then
            write(ipdmp,*)' KIRCH_FWD: reset lastrec to ',lastrec
            write(ipdmp,*)' recs_loaded = ',recs_loaded
            write(ipdmp,*)' nblock        = ',nblock
         end if
         if (lastrec.eq.seismnxs) then
            if (idbg.gt.0) then
               write(ipdmp,*)' KIRCH_FWD: begin last block'
            end if
            done=.true.
         end if
      end if

c UPSHOT: at this point nblock records of reflectivity, each of
c size reflnz*reflnx, are loaded starting at refl_ptr in the work
c array.
c ------------ beginning of RV code ---------------------

c compute some numbers

      tol  = 1.0e-4 
      fzero= 0.0e+00
      ione = 1
      rho  = 1.0e+00
      src_token = 0
      rec_token = 1

c     assign logical inversion flag

      inv_flag=0
      if (inv_flag.eq.1) then
	 inversion=.true.
      else if (inv_flag.eq.0) then
	 inversion=.false.
      else
	 write(ipdmp,*)' Error: KIRCH_FWD'
	 write(ipdmp,*)' inv_flag has impossible value = ',inv_flag
	 ier=1
	 return
      end if

c     assign logical three_d flag

      if (spd_flag.eq.1) then
	 three_d=.true.
      else if (spd_flag.eq.0) then
	 three_d=.false.
      else
	 write(ipdmp,*)' Error: KIRCH_FWD'
	 write(ipdmp,*)' spd_flag has impossible value = ',spd_flag
	 ier=1
	 return
      end if

c     assign number of tables per traveltime record

      if (inversion.and.three_d) then
         ntables=6
      else if(inversion.and.(.not.three_d))then
         ntables=5
      else if((.not.inversion).and.three_d)then      
         ntables=3
      else
         ntables=2
      endif

c     evaluate flag for gather type

      if (gather_flag.eq.0) then
         gather_type='CSG'
      else if (gather_flag.eq.1) then
         gather_type='COG'
      else
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' gather flag value = ',gather_flag
         write(ipdmp,*)' only legit values are '
         write(ipdmp,*)'      0  - common source gather'
         write(ipdmp,*)'      1  - common offset gather'
         write(ipdmp,*)' check job table'
         ier=1
         return
      end if

      if (idbg.ne.0) then
         if (three_d) then
           if (gather_type.eq.'CSG') then
              write(ipdmp,*)
     &          ' 2.5D KIRCHHOFF COMMON SOURCE SIMULATION'
              write(*,*)
     &          ' 2.5D KIRCHHOFF COMMON SOURCE SIMULATION'
           else if (gather_type.eq.'COG') then
              write(ipdmp,*)
     &          ' 2.5D KIRCHHOFF COMMON OFFSET SIMULATION'
              write(*,*)
     &          ' 2.5D KIRCHHOFF COMMON OFFSET SIMULATION'
           end if
	 else if (.not.three_d) then
           if (gather_type.eq.'CSG') then
              write(ipdmp,*)
     &          ' 2D KIRCHHOFF COMMON SOURCE SIMULATION'
              write(*,*)
     &          ' 2D KIRCHHOFF COMMON SOURCE SIMULATION'
           else if (gather_type.eq.'COG') then
              write(ipdmp,*)
     &          ' 2D KIRCHHOFF COMMON OFFSET SIMULATION'
              write(*,*)
     &          ' 2D KIRCHHOFF COMMON OFFSET SIMULATION'
           end if
         end if
         write(ipdmp,*)' INPUT PARAMETER DUMP:'
         write(ipdmp,*)' seismogram parameters '
         write(ipdmp,*)' number of samples per trace     ', seismnt
         write(ipdmp,*)' sample interval                 ', seismdt
         write(ipdmp,*)' start time                      ', seismtorig
         if (gather_type.eq.'CSG') then
            write(ipdmp,*)' traces per shot gather       ', seismnx
            write(ipdmp,*)' offset of first trace           ', 
     &           seismxorig
            write(ipdmp,*)' trace interval                  ', seismdx
            write(ipdmp,*)' number of shot gather           ', seismnxs
            write(ipdmp,*)' x coordinate of first shot      ', 
     &           seismxsorig
            write(ipdmp,*)' shot interval                   ', seismdxs
         else if (gather_type.eq.'COG') then
            write(ipdmp,*)' shots per offset gather         ', seismnx
            write(ipdmp,*)' x coordinate of first shot      ', 
     &           seismxorig
            write(ipdmp,*)' shot interval                   ', seismdx
            write(ipdmp,*)' number of offset gathers        ', seismnxs
            write(ipdmp,*)' offset of first gather          ', 
     &           seismxsorig
            write(ipdmp,*)' offset interval                 ', seismdxs
         end if
         write(ipdmp,*)' shot depth                      ', srcdep
         write(ipdmp,*)' receiver depth                  ', recdep
         write(ipdmp,*)' source  parameters'
         write(ipdmp,*)' srcnt                           ',srcnt
         write(ipdmp,*)' srcdt                           ',srcdt
         write(ipdmp,*)' model parameters'
         write(ipdmp,*)' velnz                           ',velnz
         write(ipdmp,*)' velnx                           ',velnx
         write(ipdmp,*)' reflnz                          ',reflnz
         write(ipdmp,*)' reflnx                          ',reflnx
         write(ipdmp,*)' reflnxs                         ',reflnxs
         write(ipdmp,*)' refldz                          ',refldz
         write(ipdmp,*)' refldx                          ',refldx
         write(ipdmp,*)' reflzorig                       ',reflzorig
         write(ipdmp,*)' reflxorig                       ',reflxorig
         write(ipdmp,*)' veldz                           ',veldz
         write(ipdmp,*)' veldx                           ',veldx
         write(ipdmp,*)' velzorig                        ',velzorig
         write(ipdmp,*)' velxorig                        ',velxorig
         write(ipdmp,*)' miscellaneous parameters '
         write(ipdmp,*)' firstrec                        ',firstrec
         write(ipdmp,*)' lastrec                         ',lastrec
         write(ipdmp,*)' nblock                          ',nblock
         write(ipdmp,*)' len_work                        ',len_work
         write(ipdmp,*)' amp_op                          ',amp_op
         write(ipdmp,*)' ap                              ',ap
         write(ipdmp,*)' zd                              ',zd
         write(ipdmp,*)' rho                             ',rho
         write(ipdmp,*)' t0                              ',t0
         write(ipdmp,*)' tf                              ',tf
         write(ipdmp,*)' mv                              ',mv
         write(ipdmp,*)' wmz                             ',wmz
         write(ipdmp,*)' ntaper                          ',ntaper
         write(ipdmp,*)' save_ttamp                      ',save_ttamp
         write(ipdmp,*)' update                          ',update
      end if
      if (idbg.ge.3) then
         j=velnz*velnx
         write(ipdmp,*)' VEL:'
         write(ipdmp,1000)(vel(i),i=1,j)
         j=reflnz*reflnx
         write(ipdmp,*)' REFL:'
         write(ipdmp,1000)(work(refl_ptr-1+i),i=1,j)
         ier=999
         return
      end if
         
c parameter check to verify that the source distance (seismdxs)
c and the distance between receivers are integer multiples 
c of each other.

      if (abs(seismdx).le.tol*abs(seismdxs)) then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' seismdx   = ',seismdx
         write(ipdmp,*)' seismdxs   = ',seismdxs
         write(ipdmp,*)' not safely positive'
         ier=209
         return
      end if

      if ((abs(float(nint(seismdxs/seismdx))-
     &     (seismdxs/seismdx)).ge.tol).and.(idbg.gt.0)) then
         write(ipdmp,*)' Warning: KIRCH_FWD'
         write(ipdmp,*)' receiver interval : ',seismdx
         write(ipdmp,*)' source interval   : ',seismdxs
         write(ipdmp,*)' source interval must be an int mult of '
         write(ipdmp,*)' receiver interval, else this computation '
         write(ipdmp,*)' will be grossly inefficient'
      end if

      if ((abs((seismxorig/seismdx)-nint(seismxorig/seismdx))
     &     .gt.tol).and.(idbg.gt.0)) then
         write(ipdmp,*)' Warning: KIRCH_FWD'
         write(ipdmp,*)' offset of first receiver: ',seismxorig
         write(ipdmp,*)' must be an integral multiple of '
         write(ipdmp,*)' receiver interval: ',seismdx
         write(ipdmp,*)' else this computation will be grossly ',
     &        'inefficient'
      end if

c  parameter check to see if the source and
c  seismogram have same sampling interval

      if(abs(seismdt-srcdt).gt.tol*seismdt)then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' seismdt : ',seismdt
         write(ipdmp,*)' srcdt   : ',srcdt
         write(ipdmp,*)' they must be identical '
         write(ipdmp,*)' seismdt is sample int in seism '
         write(ipdmp,*)' srcdt is sample int in source '
         ier = 209
         return
      end if

c  parameter check to see if velocity and reflectivity have
c  same sampling intervals

      if(abs(veldz-refldz).gt.tol*veldz)then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' veldz : ',veldz
         write(ipdmp,*)' refldz : ',refldz
         write(ipdmp,*)' velocity and reflectivity grids incompatible'
         ier = 209
         return
      end if

      if(abs(veldx-refldx).gt.tol*veldx)then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' veldx : ',veldx
         write(ipdmp,*)' refldx : ',refldx
         write(ipdmp,*)' velocity and reflectivity grids incompatible'
         ier = 209
         return
      end if

c checks to ensure that the reflectivity grid is a
c subset of the velocity grid. This assumption will be changed soon, but
c for now:
c   1) the upper left hand corner of the velocity grid is (0,0)
c   2) velocity and reflectivity have the same sample rates

      if ((reflzorig.lt.0).or.(reflxorig.lt.0).or.
     &     ((reflzorig+veldz*(reflnz-1)).gt.(veldz*(velnz-1))).or.
     &     ((reflxorig+veldx*(reflnx-1)).gt.(veldx*(velnx-1)))) then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' reflectivity grid not subgrid of vel grid'
         write(ipdmp,*)' velnz     = ',velnz
         write(ipdmp,*)' velnx     = ',velnx
         write(ipdmp,*)' veldz     = ',veldz
         write(ipdmp,*)' veldx     = ',veldx
         write(ipdmp,*)' reflnz    = ',reflnz
         write(ipdmp,*)' reflnx    = ',reflnx
         write(ipdmp,*)' reflzorig = ',reflzorig
         write(ipdmp,*)' reflxorig = ',reflxorig
         ier=100
         return
      endif
        
      if ((reflnxs.ne.1).and.(reflnxs.ne.seismnxs)) then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' number of reflectivity records ',reflnxs
         write(ipdmp,*)' equal neither to 1 nor to number of shot'
         write(ipdmp,*)' records - inconsistent with simulator design'
         ier=209
         return
      end if

c check that veldz, veldx are safely nonzero

      if (abs(velnz*veldz).le.abs(veldz)) then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' veldz too small, = ',veldz
         ier=99
         return
      end if

      if (abs(velnx*veldx).le.abs(veldx)) then
         write(ipdmp,*)' Error: KIRCH_FWD'
         write(ipdmp,*)' veldx too small, = ',veldx
         ier=99
         return
      end if

      if (idbg.gt.0) then
         write(ipout,*)' INITIALIZE...'
      end if

c INITIALIZE OFFSET INTO WORK BUFFER

      next = work_ptr

c STORAGE FOR TRAVELTIME TABLES - CACHE FOR READ_TT

      size=tt_size
      
      call getbuf('work',tt_ptr,size,next,len_work,ipdmp,ier)
      if (ier.ne.0) then
         write(ipdmp,*)' Error: KIRCH_FWD from GETBUF
     &       	 or factor_ptr'
         return
      end if

      if (idbg.gt.0) then
         write(ipdmp,*)' traveltimes stored @',
     &        tt_ptr
      end if

c NOTE: from here until the return from KIRCH_FWD, the segment of
c work just reserved will be used solely for cache of ttamp tables,
c because work_ptr is reset after the following calls. Therefore it
c is "safe" to return to READ_TT multiple times, using this segment
c of work as cache. However the internal tables of indices in READ_TT
c must be reset before its first execution in the loop below:

      cache_flag=0

c STORAGE FOR GATHER AND TRACE INDEPENDENT QUANTITIES:

      size = seismnt
            
      call getbuf('work',tmp_ptr,size,next,len_work,ipdmp,ier)
      if (ier.ne.0) then
         write(ipdmp,*)' Error: KIRCH_FWD from GETBUF for',
     &        ' tmp_ptr'
         return
      end if

      call getbuf('work',ptrace_ptr,size,next,len_work,ipdmp,ier)
      if (ier.ne.0) then
         write(ipdmp,*)' Error: KIRCH_FWD from GETBUF for',
     &        ' tmp_ptr'
         return
      end if

      if (idbg.gt.0) then
         write(ipdmp,*)' preprocessed trace storage at ',ptrace_ptr
      end if

      size = seismnt+srcnt+10
            
      call getbuf('work',aux_ptr,size,next,len_work,ipdmp,ier)
      if (ier.ne.0) then
         write(ipdmp,*)' Error: KIRCH_FWD from GETBUF for',
     &        ' aux_ptr'
         return
      end if

      size=velnx*velnz
      
      call getbuf('work',factor_ptr,size,next,len_work,ipdmp,ier)
      if (ier.ne.0) then
         write(ipdmp,*)' Error: KIRCH_FWD from GETBUF
     &       	 or factor_ptr'
         return
      end if

      if (idbg.gt.0) then
         write(ipdmp,*)' gather independent sum. factor stored @',
     &        factor_ptr
      end if

      call factor_fwd(three_d,
     &     velnz,velnx,work(factor_ptr),vel,
     &     seismdt,rho,veldz,veldx,
     &     idbg,ipdmp, ier)
      if (ier.ne.0) then
         write(ipdmp,*)' Error: KIRCH_FWD from FACTOR_FWD'
         return
      end if

c     apply cutoff to input reflectivity

      call cutoff(reflnz,reflnx,nblock,work(refl_ptr),zd-reflzorig,
     &     veldz,ier)
      if (ier.ne.0) then
         write(ipdmp,*)' Error: KIRCH_ADJ from CUTOFF'
         return
      end if

      lenwork_loc = len_work - next
      work_ptr = next

c The source offset is always the same as the beginning of the workspace,
c since read_tt stores the fine grid table there and the source table is
c the first returned.

      srcoff=work_ptr

c LOOP OVER TRACE INDEX

c     for common source gathers the gather and source indicies are the same

c     for common offset gathers the headers mean:
c         seismnx             = number of traces = number of sources
c         seismdx             = source step
c         seismxorig          = coordinate of first source
c         seismnxs            = number of offsets
c         seismdxs            = offset step = trace step = source step
c         seismxsorig         = first offset

      do j=1,seismnx
         
c LOOP OVER GATHER INDEX

         do i=firstrec,lastrec

            if (idbg.ne.0) then
               write(ipout,*)
     &              ' SIMULATION: gather = ',i,' trace = ',j
            end if

c COMPUTE SOURCE AND RECEIVER POSITIONS, OFFSET INTO SEISM,
c REFL ARRAYS - for forward mode, drefl_ptr indexes into work
c so add offset to refl_ptr (where refl section of work starts);
c seism_ptr indexes into seism, so no need. For adjoint modes
c the roles are reversed - seism_ptr indexes into work, drefl_ptr
c into drefl (output).

            if (reflnxs.eq.1) then
               drefl_ptr=refl_ptr
            else
               drefl_ptr=(i-firstrec)*reflnz*reflnx+refl_ptr
            endif
            trace_ptr=(i-firstrec)*seismnt*seismnx
     &           + (j-1)*seismnt + 1

            if (gather_type.eq.'CSG') then

               srcpos = seismxsorig + (i-1)*seismdxs
               recpos = srcpos + seismxorig  + (j-1)*seismdx

            else if (gather_type.eq.'COG') then

c     here j=source index (=trace index) and i=offset index=recvr index

               srcpos = seismxorig  + (j-1)*seismdx
               recpos = seismxsorig + (i-1)*seismdxs + srcpos

            else

               write(ipdmp,*)' Error: KIRCH_FWD'
               write(ipdmp,*)' unrecognized gather type: ',gather_type

            end if

            if (idbg.ne.0) then
               write(ipout,*)
     &              ' COMPUTING SOURCE TRAVELTIME TABLE '
               write(ipout,*)' position = ',srcpos
               write(ipdmp,*)
     &              ' COMPUTING SOURCE TRAVELTIME TABLE '
               write(ipdmp,*)' position = ',srcpos
            end if
            call read_ttdata(cache_flag,inversion,three_d,
     &           vel,work(tt_ptr),work(srcoff),src_token,
     &           velnz,velzorig,veldz,
     &           velnx,velxorig,veldx,seismnt,seismdt,
     &           srcnzbox,srcnxbox,srcboxorig,
     &           srcdep,srcpos,save_ttamp,tt_sam,tt_size,
     &           amp_op,update,lenwork_loc,
     &           ap,zd,nfls,ipdmp,ier)      
            if (ier.ne.0) then
               write(ipdmp,*)' Error: KIRCH_FWD from READ_TT'
               return
            end if

c receiver table returned in next block of work - now know what size it is!

            recoff = srcoff + ntables*srcnzbox*srcnxbox

            if (idbg.gt.0) then
               write(ipout,*)
     &              ' COMPUTING RECEIVER TRAVELTIME TABLE '
               write(ipout,*)' position = ',recpos
               write(ipdmp,*)
     &              ' COMPUTING RECEIVER TRAVELTIME TABLE '
               write(ipdmp,*)' position = ',recpos
            end if
            call read_ttdata(cache_flag,inversion,three_d,
     &           vel,work(tt_ptr),work(recoff),rec_token,
     &           velnz,velzorig,veldz,
     &           velnx,velxorig,veldx,seismnt,seismdt,
     &           recnzbox,recnxbox,recboxorig,
     &           recdep,recpos,save_ttamp,tt_sam,tt_size,
     &           amp_op,update,lenwork_loc,
     &           ap,zd,nfls,ipdmp,ier)      
            if (ier.ne.0) then
               write(ipdmp,*)
     &              ' Error: KIRCH_FWD from READ_TT'
               return
            end if
            
            if (idbg.gt.0) then
               write(ipdmp,*)' KIRCH_FWD ---> KSUM:'
               write(ipdmp,*)' reflectivity loaded at     ',
     &              drefl_ptr
               write(ipdmp,*)' src tt/amp table loaded at ',
     &              srcoff
               write(ipdmp,*)' rec tt/amp table loaded at ',
     &              recoff
               write(ipdmp,*)' trace output at            ',
     &              trace_ptr
            end if

            call kirch_sum_fwd(three_d,
     &           velnz,velnx,veldz,veldx,zd,
     &           reflnz,reflnx,reflzorig,reflxorig,
     &           srcnzbox,srcnxbox,srcboxorig,
     &           recnzbox,recnxbox,recboxorig,
     &           work(drefl_ptr),work(factor_ptr),
     &           work(srcoff),
     &           work(recoff),
     &           seism(trace_ptr),
     &           seismnt,seismdt,
     &           ipdmp,ier)
            if (ier.ne.0) then
               write(ipdmp,*)' Error: KIRCH_FWD from KSUM_FWD'
               return
            end if
            
            mode='fwd'
            
            call cendif(mode,
     &           seism(trace_ptr), work(tmp_ptr), 
     &           seismdt, seismnt, idbg, ipdmp, ier)
            if (ier.ne.0) then
               write(ipdmp,*)' Error: KIRCH_FWD from CENDIF'
               return
            end if
                     
            call convolve(mode,
     &           seismnt, seismdt, work(tmp_ptr), 
     &           seism(trace_ptr), srcnt, src, work(aux_ptr), 
     &           idbg, ipdmp, ier)
            if (ier.ne.0) then
               write(ipdmp,*)' Error: KIRCH_FWD from CONVOLVE'
               return
            end if

         end do
         
      end do

c     mute and taper last since this is the forward calculation

      call mute_and_taper(seism,seismnt,seismnx,
     &     firstrec,nblock,
     &     t0, tf, mv, wmz, ntaper,
     &     seismxorig,seismdx,seismdt,seismxsorig,seismdxs,
     &     gather_type,ipdmp,ier)
      if(ier.ne.0)then
         write(ipdmp,*)' Error: KIRCH_FWD from MUTE_AND_TAPER '
         return
      end if

c     save nfls data - easiest way is just to overwrite existing 
c     file, even though this is slightly inefficient.

      if (save_ttamp.ne.0) then

         call getblanks(posnfile)
         posnfile(1:8)='posn.job'

         call fileopen(posnfile,posnunit,2,ier)
         if (ier.ne.0) then
            write(ipdmp,*)' Error: KIRCH_FWD from FILEOPEN'
            write(ipdmp,*)' on open of nfls file ',posnfile
            return
         end if
         write(posnunit,2000)nfls
         if (idbg.gt.0) then
            write(ipdmp,*)' KIRCH_FWD: number of saved tables = ',nfls
         end if
         call filecls(posnunit,ier)
         if (ier.ne.0) then
            write(ipdmp,*)' Error: KIRCH_FWD from FILECLS'
            write(ipdmp,*)' on close of nfls file ',posnfile
            return
         end if

      end if
         
 1000 format(6(e12.4,2x))
 2000 format(i10)

      return
      end
c====================================================================
