
      subroutine cfftb (n,c,wsave)
      dimension       c(1)       ,wsave(1)
      if (n .eq. 1) return
      iw1 = n+n+1
      iw2 = iw1+n+n
      call cfftb1 (n,c,wsave,wsave(iw1),wsave(iw2))
      return
      end
      subroutine cfftb1 (n,c,ch,wa,ifac)
      dimension       ch(1)      ,c(1)       ,wa(1)      ,ifac(1)
      nf = ifac(2)
      na = 0
      l1 = 1
      iw = 1
      do 116 k1=1,nf
         ip = ifac(k1+2)
         l2 = ip*l1
         ido = n/l2
         idot = ido+ido
         idl1 = idot*l1
         if (ip .ne. 4) go to 103
         ix2 = iw+idot
         ix3 = ix2+idot
         if (na .ne. 0) go to 101
         call passb4 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3))
         go to 102
  101    call passb4 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3))
  102    na = 1-na
         go to 115
  103    if (ip .ne. 2) go to 106
         if (na .ne. 0) go to 104
         call passb2 (idot,l1,c,ch,wa(iw))
         go to 105
  104    call passb2 (idot,l1,ch,c,wa(iw))
  105    na = 1-na
         go to 115
  106    if (ip .ne. 3) go to 109
         ix2 = iw+idot
         if (na .ne. 0) go to 107
         call passb3 (idot,l1,c,ch,wa(iw),wa(ix2))
         go to 108
  107    call passb3 (idot,l1,ch,c,wa(iw),wa(ix2))
  108    na = 1-na
         go to 115
  109    if (ip .ne. 5) go to 112
         ix2 = iw+idot
         ix3 = ix2+idot
         ix4 = ix3+idot
         if (na .ne. 0) go to 110
         call passb5 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 111
  110    call passb5 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3),wa(ix4))
  111    na = 1-na
         go to 115
  112    if (na .ne. 0) go to 113
         call passb (nac,idot,ip,l1,idl1,c,c,c,ch,ch,wa(iw))
         go to 114
  113    call passb (nac,idot,ip,l1,idl1,ch,ch,ch,c,c,wa(iw))
  114    if (nac .ne. 0) na = 1-na
  115    l1 = l2
         iw = iw+(ip-1)*idot
  116 continue
      if (na .eq. 0) return
      n2 = n+n
      do 117 i=1,n2
         c(i) = ch(i)
  117 continue
      return
      end
      subroutine passb (nac,ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa)
      dimension       ch(ido,l1,ip)          ,cc(ido,ip,l1)          ,
     1                c1(ido,l1,ip)          ,wa(1)      ,c2(idl1,ip),
     2                ch2(idl1,ip)
      idot = ido/2
      nt = ip*idl1
      ipp2 = ip+2
      ipph = (ip+1)/2
      idp = ip*ido
c
      if (ido .lt. l1) go to 106
      do 103 j=2,ipph
         jc = ipp2-j
         do 102 k=1,l1
            do 101 i=1,ido
               ch(i,k,j) = cc(i,j,k)+cc(i,jc,k)
               ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k)
  101       continue
  102    continue
  103 continue
      do 105 k=1,l1
         do 104 i=1,ido
            ch(i,k,1) = cc(i,1,k)
  104    continue
  105 continue
      go to 112
  106 do 109 j=2,ipph
         jc = ipp2-j
         do 108 i=1,ido
            do 107 k=1,l1
               ch(i,k,j) = cc(i,j,k)+cc(i,jc,k)
               ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k)
  107       continue
  108    continue
  109 continue
      do 111 i=1,ido
         do 110 k=1,l1
            ch(i,k,1) = cc(i,1,k)
  110    continue
  111 continue
  112 idl = 2-ido
      inc = 0
      do 116 l=2,ipph
         lc = ipp2-l
         idl = idl+ido
         do 113 ik=1,idl1
            c2(ik,l) = ch2(ik,1)+wa(idl-1)*ch2(ik,2)
            c2(ik,lc) = wa(idl)*ch2(ik,ip)
  113    continue
         idlj = idl
         inc = inc+ido
         do 115 j=3,ipph
            jc = ipp2-j
            idlj = idlj+inc
            if (idlj .gt. idp) idlj = idlj-idp
            war = wa(idlj-1)
            wai = wa(idlj)
            do 114 ik=1,idl1
               c2(ik,l) = c2(ik,l)+war*ch2(ik,j)
               c2(ik,lc) = c2(ik,lc)+wai*ch2(ik,jc)
  114       continue
  115    continue
  116 continue
      do 118 j=2,ipph
         do 117 ik=1,idl1
            ch2(ik,1) = ch2(ik,1)+ch2(ik,j)
  117    continue
  118 continue
      do 120 j=2,ipph
         jc = ipp2-j
         do 119 ik=2,idl1,2
            ch2(ik-1,j) = c2(ik-1,j)-c2(ik,jc)
            ch2(ik-1,jc) = c2(ik-1,j)+c2(ik,jc)
            ch2(ik,j) = c2(ik,j)+c2(ik-1,jc)
            ch2(ik,jc) = c2(ik,j)-c2(ik-1,jc)
  119    continue
  120 continue
      nac = 1
      if (ido .eq. 2) return
      nac = 0
      do 121 ik=1,idl1
         c2(ik,1) = ch2(ik,1)
  121 continue
      do 123 j=2,ip
         do 122 k=1,l1
            c1(1,k,j) = ch(1,k,j)
            c1(2,k,j) = ch(2,k,j)
  122    continue
  123 continue
      if (idot .gt. l1) go to 127
      idij = 0
      do 126 j=2,ip
         idij = idij+2
         do 125 i=4,ido,2
            idij = idij+2
            do 124 k=1,l1
               c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)-wa(idij)*ch(i,k,j)
               c1(i,k,j) = wa(idij-1)*ch(i,k,j)+wa(idij)*ch(i-1,k,j)
  124       continue
  125    continue
  126 continue
      return
  127 idj = 2-ido
      do 130 j=2,ip
         idj = idj+ido
         do 129 k=1,l1
            idij = idj
            do 128 i=4,ido,2
               idij = idij+2
               c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)-wa(idij)*ch(i,k,j)
               c1(i,k,j) = wa(idij-1)*ch(i,k,j)+wa(idij)*ch(i-1,k,j)
  128       continue
  129    continue
  130 continue
      return
      end
      subroutine passb2 (ido,l1,cc,ch,wa1)
      dimension       cc(ido,2,l1)           ,ch(ido,l1,2)           ,
     1                wa1(1)
      if (ido .gt. 2) go to 102
      do 101 k=1,l1
         ch(1,k,1) = cc(1,1,k)+cc(1,2,k)
         ch(1,k,2) = cc(1,1,k)-cc(1,2,k)
         ch(2,k,1) = cc(2,1,k)+cc(2,2,k)
         ch(2,k,2) = cc(2,1,k)-cc(2,2,k)
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            ch(i-1,k,1) = cc(i-1,1,k)+cc(i-1,2,k)
            tr2 = cc(i-1,1,k)-cc(i-1,2,k)
            ch(i,k,1) = cc(i,1,k)+cc(i,2,k)
            ti2 = cc(i,1,k)-cc(i,2,k)
            ch(i,k,2) = wa1(i-1)*ti2+wa1(i)*tr2
            ch(i-1,k,2) = wa1(i-1)*tr2-wa1(i)*ti2
  103    continue
  104 continue
      return
      end
      subroutine passb3 (ido,l1,cc,ch,wa1,wa2)
      dimension       cc(ido,3,l1)           ,ch(ido,l1,3)           ,
     1                wa1(1)     ,wa2(1)
      data taur,taui /-.5,.866025403784439/
      if (ido .ne. 2) go to 102
      do 101 k=1,l1
         tr2 = cc(1,2,k)+cc(1,3,k)
         cr2 = cc(1,1,k)+taur*tr2
         ch(1,k,1) = cc(1,1,k)+tr2
         ti2 = cc(2,2,k)+cc(2,3,k)
         ci2 = cc(2,1,k)+taur*ti2
         ch(2,k,1) = cc(2,1,k)+ti2
         cr3 = taui*(cc(1,2,k)-cc(1,3,k))
         ci3 = taui*(cc(2,2,k)-cc(2,3,k))
         ch(1,k,2) = cr2-ci3
         ch(1,k,3) = cr2+ci3
         ch(2,k,2) = ci2+cr3
         ch(2,k,3) = ci2-cr3
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            tr2 = cc(i-1,2,k)+cc(i-1,3,k)
            cr2 = cc(i-1,1,k)+taur*tr2
            ch(i-1,k,1) = cc(i-1,1,k)+tr2
            ti2 = cc(i,2,k)+cc(i,3,k)
            ci2 = cc(i,1,k)+taur*ti2
            ch(i,k,1) = cc(i,1,k)+ti2
            cr3 = taui*(cc(i-1,2,k)-cc(i-1,3,k))
            ci3 = taui*(cc(i,2,k)-cc(i,3,k))
            dr2 = cr2-ci3
            dr3 = cr2+ci3
            di2 = ci2+cr3
            di3 = ci2-cr3
            ch(i,k,2) = wa1(i-1)*di2+wa1(i)*dr2
            ch(i-1,k,2) = wa1(i-1)*dr2-wa1(i)*di2
            ch(i,k,3) = wa2(i-1)*di3+wa2(i)*dr3
            ch(i-1,k,3) = wa2(i-1)*dr3-wa2(i)*di3
  103    continue
  104 continue
      return
      end
      subroutine passb4 (ido,l1,cc,ch,wa1,wa2,wa3)
      dimension       cc(ido,4,l1)           ,ch(ido,l1,4)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)
      if (ido .ne. 2) go to 102
      do 101 k=1,l1
         ti1 = cc(2,1,k)-cc(2,3,k)
         ti2 = cc(2,1,k)+cc(2,3,k)
         tr4 = cc(2,4,k)-cc(2,2,k)
         ti3 = cc(2,2,k)+cc(2,4,k)
         tr1 = cc(1,1,k)-cc(1,3,k)
         tr2 = cc(1,1,k)+cc(1,3,k)
         ti4 = cc(1,2,k)-cc(1,4,k)
         tr3 = cc(1,2,k)+cc(1,4,k)
         ch(1,k,1) = tr2+tr3
         ch(1,k,3) = tr2-tr3
         ch(2,k,1) = ti2+ti3
         ch(2,k,3) = ti2-ti3
         ch(1,k,2) = tr1+tr4
         ch(1,k,4) = tr1-tr4
         ch(2,k,2) = ti1+ti4
         ch(2,k,4) = ti1-ti4
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            ti1 = cc(i,1,k)-cc(i,3,k)
            ti2 = cc(i,1,k)+cc(i,3,k)
            ti3 = cc(i,2,k)+cc(i,4,k)
            tr4 = cc(i,4,k)-cc(i,2,k)
            tr1 = cc(i-1,1,k)-cc(i-1,3,k)
            tr2 = cc(i-1,1,k)+cc(i-1,3,k)
            ti4 = cc(i-1,2,k)-cc(i-1,4,k)
            tr3 = cc(i-1,2,k)+cc(i-1,4,k)
            ch(i-1,k,1) = tr2+tr3
            cr3 = tr2-tr3
            ch(i,k,1) = ti2+ti3
            ci3 = ti2-ti3
            cr2 = tr1+tr4
            cr4 = tr1-tr4
            ci2 = ti1+ti4
            ci4 = ti1-ti4
            ch(i-1,k,2) = wa1(i-1)*cr2-wa1(i)*ci2
            ch(i,k,2) = wa1(i-1)*ci2+wa1(i)*cr2
            ch(i-1,k,3) = wa2(i-1)*cr3-wa2(i)*ci3
            ch(i,k,3) = wa2(i-1)*ci3+wa2(i)*cr3
            ch(i-1,k,4) = wa3(i-1)*cr4-wa3(i)*ci4
            ch(i,k,4) = wa3(i-1)*ci4+wa3(i)*cr4
  103    continue
  104 continue
      return
      end
      subroutine passb5 (ido,l1,cc,ch,wa1,wa2,wa3,wa4)
      dimension       cc(ido,5,l1)           ,ch(ido,l1,5)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)     ,wa4(1)
      data tr11,ti11,tr12,ti12 /.309016994374947,.951056516295154,
     1-.809016994374947,.587785252292473/
      if (ido .ne. 2) go to 102
      do 101 k=1,l1
         ti5 = cc(2,2,k)-cc(2,5,k)
         ti2 = cc(2,2,k)+cc(2,5,k)
         ti4 = cc(2,3,k)-cc(2,4,k)
         ti3 = cc(2,3,k)+cc(2,4,k)
         tr5 = cc(1,2,k)-cc(1,5,k)
         tr2 = cc(1,2,k)+cc(1,5,k)
         tr4 = cc(1,3,k)-cc(1,4,k)
         tr3 = cc(1,3,k)+cc(1,4,k)
         ch(1,k,1) = cc(1,1,k)+tr2+tr3
         ch(2,k,1) = cc(2,1,k)+ti2+ti3
         cr2 = cc(1,1,k)+tr11*tr2+tr12*tr3
         ci2 = cc(2,1,k)+tr11*ti2+tr12*ti3
         cr3 = cc(1,1,k)+tr12*tr2+tr11*tr3
         ci3 = cc(2,1,k)+tr12*ti2+tr11*ti3
         cr5 = ti11*tr5+ti12*tr4
         ci5 = ti11*ti5+ti12*ti4
         cr4 = ti12*tr5-ti11*tr4
         ci4 = ti12*ti5-ti11*ti4
         ch(1,k,2) = cr2-ci5
         ch(1,k,5) = cr2+ci5
         ch(2,k,2) = ci2+cr5
         ch(2,k,3) = ci3+cr4
         ch(1,k,3) = cr3-ci4
         ch(1,k,4) = cr3+ci4
         ch(2,k,4) = ci3-cr4
         ch(2,k,5) = ci2-cr5
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            ti5 = cc(i,2,k)-cc(i,5,k)
            ti2 = cc(i,2,k)+cc(i,5,k)
            ti4 = cc(i,3,k)-cc(i,4,k)
            ti3 = cc(i,3,k)+cc(i,4,k)
            tr5 = cc(i-1,2,k)-cc(i-1,5,k)
            tr2 = cc(i-1,2,k)+cc(i-1,5,k)
            tr4 = cc(i-1,3,k)-cc(i-1,4,k)
            tr3 = cc(i-1,3,k)+cc(i-1,4,k)
            ch(i-1,k,1) = cc(i-1,1,k)+tr2+tr3
            ch(i,k,1) = cc(i,1,k)+ti2+ti3
            cr2 = cc(i-1,1,k)+tr11*tr2+tr12*tr3
            ci2 = cc(i,1,k)+tr11*ti2+tr12*ti3
            cr3 = cc(i-1,1,k)+tr12*tr2+tr11*tr3
            ci3 = cc(i,1,k)+tr12*ti2+tr11*ti3
            cr5 = ti11*tr5+ti12*tr4
            ci5 = ti11*ti5+ti12*ti4
            cr4 = ti12*tr5-ti11*tr4
            ci4 = ti12*ti5-ti11*ti4
            dr3 = cr3-ci4
            dr4 = cr3+ci4
            di3 = ci3+cr4
            di4 = ci3-cr4
            dr5 = cr2+ci5
            dr2 = cr2-ci5
            di5 = ci2-cr5
            di2 = ci2+cr5
            ch(i-1,k,2) = wa1(i-1)*dr2-wa1(i)*di2
            ch(i,k,2) = wa1(i-1)*di2+wa1(i)*dr2
            ch(i-1,k,3) = wa2(i-1)*dr3-wa2(i)*di3
            ch(i,k,3) = wa2(i-1)*di3+wa2(i)*dr3
            ch(i-1,k,4) = wa3(i-1)*dr4-wa3(i)*di4
            ch(i,k,4) = wa3(i-1)*di4+wa3(i)*dr4
            ch(i-1,k,5) = wa4(i-1)*dr5-wa4(i)*di5
            ch(i,k,5) = wa4(i-1)*di5+wa4(i)*dr5
  103    continue
  104 continue
      return
      end


      subroutine cfftf (n,c,wsave)
      dimension       c(1)       ,wsave(1)
      if (n .eq. 1) return
      iw1 = n+n+1
      iw2 = iw1+n+n
      call cfftf1 (n,c,wsave,wsave(iw1),wsave(iw2))
      return
      end
      subroutine cfftf1 (n,c,ch,wa,ifac)
      dimension       ch(1)      ,c(1)       ,wa(1)      ,ifac(1)
      nf = ifac(2)
      na = 0
      l1 = 1
      iw = 1
      do 116 k1=1,nf
         ip = ifac(k1+2)
         l2 = ip*l1
         ido = n/l2
         idot = ido+ido
         idl1 = idot*l1
         if (ip .ne. 4) go to 103
         ix2 = iw+idot
         ix3 = ix2+idot
         if (na .ne. 0) go to 101
         call passf4 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3))
         go to 102
  101    call passf4 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3))
  102    na = 1-na
         go to 115
  103    if (ip .ne. 2) go to 106
         if (na .ne. 0) go to 104
         call passf2 (idot,l1,c,ch,wa(iw))
         go to 105
  104    call passf2 (idot,l1,ch,c,wa(iw))
  105    na = 1-na
         go to 115
  106    if (ip .ne. 3) go to 109
         ix2 = iw+idot
         if (na .ne. 0) go to 107
         call passf3 (idot,l1,c,ch,wa(iw),wa(ix2))
         go to 108
  107    call passf3 (idot,l1,ch,c,wa(iw),wa(ix2))
  108    na = 1-na
         go to 115
  109    if (ip .ne. 5) go to 112
         ix2 = iw+idot
         ix3 = ix2+idot
         ix4 = ix3+idot
         if (na .ne. 0) go to 110
         call passf5 (idot,l1,c,ch,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 111
  110    call passf5 (idot,l1,ch,c,wa(iw),wa(ix2),wa(ix3),wa(ix4))
  111    na = 1-na
         go to 115
  112    if (na .ne. 0) go to 113
         call passf (nac,idot,ip,l1,idl1,c,c,c,ch,ch,wa(iw))
         go to 114
  113    call passf (nac,idot,ip,l1,idl1,ch,ch,ch,c,c,wa(iw))
  114    if (nac .ne. 0) na = 1-na
  115    l1 = l2
         iw = iw+(ip-1)*idot
  116 continue
      if (na .eq. 0) return
      n2 = n+n
      do 117 i=1,n2
         c(i) = ch(i)
  117 continue
      return
      end
      subroutine passf (nac,ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa)
      dimension       ch(ido,l1,ip)          ,cc(ido,ip,l1)          ,
     1                c1(ido,l1,ip)          ,wa(1)      ,c2(idl1,ip),
     2                ch2(idl1,ip)
      idot = ido/2
      nt = ip*idl1
      ipp2 = ip+2
      ipph = (ip+1)/2
      idp = ip*ido
c
      if (ido .lt. l1) go to 106
      do 103 j=2,ipph
         jc = ipp2-j
         do 102 k=1,l1
            do 101 i=1,ido
               ch(i,k,j) = cc(i,j,k)+cc(i,jc,k)
               ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k)
  101       continue
  102    continue
  103 continue
      do 105 k=1,l1
         do 104 i=1,ido
            ch(i,k,1) = cc(i,1,k)
  104    continue
  105 continue
      go to 112
  106 do 109 j=2,ipph
         jc = ipp2-j
         do 108 i=1,ido
            do 107 k=1,l1
               ch(i,k,j) = cc(i,j,k)+cc(i,jc,k)
               ch(i,k,jc) = cc(i,j,k)-cc(i,jc,k)
  107       continue
  108    continue
  109 continue
      do 111 i=1,ido
         do 110 k=1,l1
            ch(i,k,1) = cc(i,1,k)
  110    continue
  111 continue
  112 idl = 2-ido
      inc = 0
      do 116 l=2,ipph
         lc = ipp2-l
         idl = idl+ido
         do 113 ik=1,idl1
            c2(ik,l) = ch2(ik,1)+wa(idl-1)*ch2(ik,2)
            c2(ik,lc) = -wa(idl)*ch2(ik,ip)
  113    continue
         idlj = idl
         inc = inc+ido
         do 115 j=3,ipph
            jc = ipp2-j
            idlj = idlj+inc
            if (idlj .gt. idp) idlj = idlj-idp
            war = wa(idlj-1)
            wai = wa(idlj)
            do 114 ik=1,idl1
               c2(ik,l) = c2(ik,l)+war*ch2(ik,j)
               c2(ik,lc) = c2(ik,lc)-wai*ch2(ik,jc)
  114       continue
  115    continue
  116 continue
      do 118 j=2,ipph
         do 117 ik=1,idl1
            ch2(ik,1) = ch2(ik,1)+ch2(ik,j)
  117    continue
  118 continue
      do 120 j=2,ipph
         jc = ipp2-j
         do 119 ik=2,idl1,2
            ch2(ik-1,j) = c2(ik-1,j)-c2(ik,jc)
            ch2(ik-1,jc) = c2(ik-1,j)+c2(ik,jc)
            ch2(ik,j) = c2(ik,j)+c2(ik-1,jc)
            ch2(ik,jc) = c2(ik,j)-c2(ik-1,jc)
  119    continue
  120 continue
      nac = 1
      if (ido .eq. 2) return
      nac = 0
      do 121 ik=1,idl1
         c2(ik,1) = ch2(ik,1)
  121 continue
      do 123 j=2,ip
         do 122 k=1,l1
            c1(1,k,j) = ch(1,k,j)
            c1(2,k,j) = ch(2,k,j)
  122    continue
  123 continue
      if (idot .gt. l1) go to 127
      idij = 0
      do 126 j=2,ip
         idij = idij+2
         do 125 i=4,ido,2
            idij = idij+2
            do 124 k=1,l1
               c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)+wa(idij)*ch(i,k,j)
               c1(i,k,j) = wa(idij-1)*ch(i,k,j)-wa(idij)*ch(i-1,k,j)
  124       continue
  125    continue
  126 continue
      return
  127 idj = 2-ido
      do 130 j=2,ip
         idj = idj+ido
         do 129 k=1,l1
            idij = idj
            do 128 i=4,ido,2
               idij = idij+2
               c1(i-1,k,j) = wa(idij-1)*ch(i-1,k,j)+wa(idij)*ch(i,k,j)
               c1(i,k,j) = wa(idij-1)*ch(i,k,j)-wa(idij)*ch(i-1,k,j)
  128       continue
  129    continue
  130 continue
      return
      end
      subroutine passf2 (ido,l1,cc,ch,wa1)
      dimension       cc(ido,2,l1)           ,ch(ido,l1,2)           ,
     1                wa1(1)
      if (ido .gt. 2) go to 102
      do 101 k=1,l1
         ch(1,k,1) = cc(1,1,k)+cc(1,2,k)
         ch(1,k,2) = cc(1,1,k)-cc(1,2,k)
         ch(2,k,1) = cc(2,1,k)+cc(2,2,k)
         ch(2,k,2) = cc(2,1,k)-cc(2,2,k)
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            ch(i-1,k,1) = cc(i-1,1,k)+cc(i-1,2,k)
            tr2 = cc(i-1,1,k)-cc(i-1,2,k)
            ch(i,k,1) = cc(i,1,k)+cc(i,2,k)
            ti2 = cc(i,1,k)-cc(i,2,k)
            ch(i,k,2) = wa1(i-1)*ti2-wa1(i)*tr2
            ch(i-1,k,2) = wa1(i-1)*tr2+wa1(i)*ti2
  103    continue
  104 continue
      return
      end
      subroutine passf3 (ido,l1,cc,ch,wa1,wa2)
      dimension       cc(ido,3,l1)           ,ch(ido,l1,3)           ,
     1                wa1(1)     ,wa2(1)
      data taur,taui /-.5,-.866025403784439/
      if (ido .ne. 2) go to 102
      do 101 k=1,l1
         tr2 = cc(1,2,k)+cc(1,3,k)
         cr2 = cc(1,1,k)+taur*tr2
         ch(1,k,1) = cc(1,1,k)+tr2
         ti2 = cc(2,2,k)+cc(2,3,k)
         ci2 = cc(2,1,k)+taur*ti2
         ch(2,k,1) = cc(2,1,k)+ti2
         cr3 = taui*(cc(1,2,k)-cc(1,3,k))
         ci3 = taui*(cc(2,2,k)-cc(2,3,k))
         ch(1,k,2) = cr2-ci3
         ch(1,k,3) = cr2+ci3
         ch(2,k,2) = ci2+cr3
         ch(2,k,3) = ci2-cr3
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            tr2 = cc(i-1,2,k)+cc(i-1,3,k)
            cr2 = cc(i-1,1,k)+taur*tr2
            ch(i-1,k,1) = cc(i-1,1,k)+tr2
            ti2 = cc(i,2,k)+cc(i,3,k)
            ci2 = cc(i,1,k)+taur*ti2
            ch(i,k,1) = cc(i,1,k)+ti2
            cr3 = taui*(cc(i-1,2,k)-cc(i-1,3,k))
            ci3 = taui*(cc(i,2,k)-cc(i,3,k))
            dr2 = cr2-ci3
            dr3 = cr2+ci3
            di2 = ci2+cr3
            di3 = ci2-cr3
            ch(i,k,2) = wa1(i-1)*di2-wa1(i)*dr2
            ch(i-1,k,2) = wa1(i-1)*dr2+wa1(i)*di2
            ch(i,k,3) = wa2(i-1)*di3-wa2(i)*dr3
            ch(i-1,k,3) = wa2(i-1)*dr3+wa2(i)*di3
  103    continue
  104 continue
      return
      end
      subroutine passf4 (ido,l1,cc,ch,wa1,wa2,wa3)
      dimension       cc(ido,4,l1)           ,ch(ido,l1,4)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)
      if (ido .ne. 2) go to 102
      do 101 k=1,l1
         ti1 = cc(2,1,k)-cc(2,3,k)
         ti2 = cc(2,1,k)+cc(2,3,k)
         tr4 = cc(2,2,k)-cc(2,4,k)
         ti3 = cc(2,2,k)+cc(2,4,k)
         tr1 = cc(1,1,k)-cc(1,3,k)
         tr2 = cc(1,1,k)+cc(1,3,k)
         ti4 = cc(1,4,k)-cc(1,2,k)
         tr3 = cc(1,2,k)+cc(1,4,k)
         ch(1,k,1) = tr2+tr3
         ch(1,k,3) = tr2-tr3
         ch(2,k,1) = ti2+ti3
         ch(2,k,3) = ti2-ti3
         ch(1,k,2) = tr1+tr4
         ch(1,k,4) = tr1-tr4
         ch(2,k,2) = ti1+ti4
         ch(2,k,4) = ti1-ti4
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            ti1 = cc(i,1,k)-cc(i,3,k)
            ti2 = cc(i,1,k)+cc(i,3,k)
            ti3 = cc(i,2,k)+cc(i,4,k)
            tr4 = cc(i,2,k)-cc(i,4,k)
            tr1 = cc(i-1,1,k)-cc(i-1,3,k)
            tr2 = cc(i-1,1,k)+cc(i-1,3,k)
            ti4 = cc(i-1,4,k)-cc(i-1,2,k)
            tr3 = cc(i-1,2,k)+cc(i-1,4,k)
            ch(i-1,k,1) = tr2+tr3
            cr3 = tr2-tr3
            ch(i,k,1) = ti2+ti3
            ci3 = ti2-ti3
            cr2 = tr1+tr4
            cr4 = tr1-tr4
            ci2 = ti1+ti4
            ci4 = ti1-ti4
            ch(i-1,k,2) = wa1(i-1)*cr2+wa1(i)*ci2
            ch(i,k,2) = wa1(i-1)*ci2-wa1(i)*cr2
            ch(i-1,k,3) = wa2(i-1)*cr3+wa2(i)*ci3
            ch(i,k,3) = wa2(i-1)*ci3-wa2(i)*cr3
            ch(i-1,k,4) = wa3(i-1)*cr4+wa3(i)*ci4
            ch(i,k,4) = wa3(i-1)*ci4-wa3(i)*cr4
  103    continue
  104 continue
      return
      end
      subroutine passf5 (ido,l1,cc,ch,wa1,wa2,wa3,wa4)
      dimension       cc(ido,5,l1)           ,ch(ido,l1,5)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)     ,wa4(1)
      data tr11,ti11,tr12,ti12 /.309016994374947,-.951056516295154,
     1-.809016994374947,-.587785252292473/
      if (ido .ne. 2) go to 102
      do 101 k=1,l1
         ti5 = cc(2,2,k)-cc(2,5,k)
         ti2 = cc(2,2,k)+cc(2,5,k)
         ti4 = cc(2,3,k)-cc(2,4,k)
         ti3 = cc(2,3,k)+cc(2,4,k)
         tr5 = cc(1,2,k)-cc(1,5,k)
         tr2 = cc(1,2,k)+cc(1,5,k)
         tr4 = cc(1,3,k)-cc(1,4,k)
         tr3 = cc(1,3,k)+cc(1,4,k)
         ch(1,k,1) = cc(1,1,k)+tr2+tr3
         ch(2,k,1) = cc(2,1,k)+ti2+ti3
         cr2 = cc(1,1,k)+tr11*tr2+tr12*tr3
         ci2 = cc(2,1,k)+tr11*ti2+tr12*ti3
         cr3 = cc(1,1,k)+tr12*tr2+tr11*tr3
         ci3 = cc(2,1,k)+tr12*ti2+tr11*ti3
         cr5 = ti11*tr5+ti12*tr4
         ci5 = ti11*ti5+ti12*ti4
         cr4 = ti12*tr5-ti11*tr4
         ci4 = ti12*ti5-ti11*ti4
         ch(1,k,2) = cr2-ci5
         ch(1,k,5) = cr2+ci5
         ch(2,k,2) = ci2+cr5
         ch(2,k,3) = ci3+cr4
         ch(1,k,3) = cr3-ci4
         ch(1,k,4) = cr3+ci4
         ch(2,k,4) = ci3-cr4
         ch(2,k,5) = ci2-cr5
  101 continue
      return
  102 do 104 k=1,l1
         do 103 i=2,ido,2
            ti5 = cc(i,2,k)-cc(i,5,k)
            ti2 = cc(i,2,k)+cc(i,5,k)
            ti4 = cc(i,3,k)-cc(i,4,k)
            ti3 = cc(i,3,k)+cc(i,4,k)
            tr5 = cc(i-1,2,k)-cc(i-1,5,k)
            tr2 = cc(i-1,2,k)+cc(i-1,5,k)
            tr4 = cc(i-1,3,k)-cc(i-1,4,k)
            tr3 = cc(i-1,3,k)+cc(i-1,4,k)
            ch(i-1,k,1) = cc(i-1,1,k)+tr2+tr3
            ch(i,k,1) = cc(i,1,k)+ti2+ti3
            cr2 = cc(i-1,1,k)+tr11*tr2+tr12*tr3
            ci2 = cc(i,1,k)+tr11*ti2+tr12*ti3
            cr3 = cc(i-1,1,k)+tr12*tr2+tr11*tr3
            ci3 = cc(i,1,k)+tr12*ti2+tr11*ti3
            cr5 = ti11*tr5+ti12*tr4
            ci5 = ti11*ti5+ti12*ti4
            cr4 = ti12*tr5-ti11*tr4
            ci4 = ti12*ti5-ti11*ti4
            dr3 = cr3-ci4
            dr4 = cr3+ci4
            di3 = ci3+cr4
            di4 = ci3-cr4
            dr5 = cr2+ci5
            dr2 = cr2-ci5
            di5 = ci2-cr5
            di2 = ci2+cr5
            ch(i-1,k,2) = wa1(i-1)*dr2+wa1(i)*di2
            ch(i,k,2) = wa1(i-1)*di2-wa1(i)*dr2
            ch(i-1,k,3) = wa2(i-1)*dr3+wa2(i)*di3
            ch(i,k,3) = wa2(i-1)*di3-wa2(i)*dr3
            ch(i-1,k,4) = wa3(i-1)*dr4+wa3(i)*di4
            ch(i,k,4) = wa3(i-1)*di4-wa3(i)*dr4
            ch(i-1,k,5) = wa4(i-1)*dr5+wa4(i)*di5
            ch(i,k,5) = wa4(i-1)*di5-wa4(i)*dr5
  103    continue
  104 continue
      return
      end


      subroutine cffti (n,wsave)
      dimension       wsave(1)
      if (n .eq. 1) return
      iw1 = n+n+1
      iw2 = iw1+n+n
      call cffti1 (n,wsave(iw1),wsave(iw2))
      return
      end
      subroutine cffti1 (n,wa,ifac)
      dimension       wa(1)      ,ifac(1)    ,ntryh(4)
      data ntryh(1),ntryh(2),ntryh(3),ntryh(4)/3,4,2,5/
      nl = n
      nf = 0
      j = 0
  101 j = j+1
      if (j-4) 102,102,103
  102 ntry = ntryh(j)
      go to 104
  103 ntry = ntry+2
  104 nq = nl/ntry
      nr = nl-ntry*nq
      if (nr) 101,105,101
  105 nf = nf+1
      ifac(nf+2) = ntry
      nl = nq
      if (ntry .ne. 2) go to 107
      if (nf .eq. 1) go to 107
      do 106 i=2,nf
         ib = nf-i+2
         ifac(ib+2) = ifac(ib+1)
  106 continue
      ifac(3) = 2
  107 if (nl .ne. 1) go to 104
      ifac(1) = n
      ifac(2) = nf
      tpi = 6.28318530717959
      argh = tpi/float(n)
      i = 2
      l1 = 1
      do 110 k1=1,nf
         ip = ifac(k1+2)
         ld = 0
         l2 = l1*ip
         ido = n/l2
         idot = ido+ido+2
         ipm = ip-1
         do 109 j=1,ipm
            ict1 = i
            wa(i-1) = 1.
            wa(i) = 0.
            ld = ld+l1
            fi = 0.
            argld = float(ld)*argh
            do 108 ii=4,idot,2
               i = i+2
               fi = fi+1.
               arg = fi*argld
               wa(i-1) = cos(arg)
               wa(i) = sin(arg)
  108       continue
            if (ip .le. 5) go to 109
            wa(ict1-1) = wa(i-1)
            wa(ict1) = wa(i)
  109    continue
         l1 = l2
  110 continue
      return
      end

      subroutine cost (n,x,wsave)
      dimension       x(1)       ,wsave(1)
      nm1 = n-1
      np1 = n+1
      ns2 = n/2
      if (n-2) 106,101,102
  101 x1h = x(1)+x(2)
      x(2) = x(1)-x(2)
      x(1) = x1h
      return
  102 if (n .gt. 3) go to 103
      x1p3 = x(1)+x(3)
      tx2 = x(2)+x(2)
      x(2) = x(1)-x(3)
      x(1) = x1p3+tx2
      x(3) = x1p3-tx2
      return
  103 c1 = x(1)-x(n)
      x(1) = x(1)+x(n)
      do 104 k=2,ns2
         kc = np1-k
         t1 = x(k)+x(kc)
         t2 = x(k)-x(kc)
         c1 = c1+wsave(kc)*t2
         t2 = wsave(k)*t2
         x(k) = t1-t2
         x(kc) = t1+t2
  104 continue
      modn = mod(n,2)
      if (modn .ne. 0) x(ns2+1) = x(ns2+1)+x(ns2+1)
      call rfftf (nm1,x,wsave(n+1))
      xim2 = x(2)
      x(2) = c1
      do 105 i=4,n,2
         xi = x(i)
         x(i) = x(i-2)-x(i-1)
         x(i-1) = xim2
         xim2 = xi
  105 continue
      if (modn .ne. 0) x(n) = xim2
  106 return
      end
      subroutine radf2 (ido,l1,cc,ch,wa1)
      dimension       ch(ido,2,l1)           ,cc(ido,l1,2)           ,
     1                wa1(1)
      do 101 k=1,l1
         ch(1,1,k) = cc(1,k,1)+cc(1,k,2)
         ch(ido,2,k) = cc(1,k,1)-cc(1,k,2)
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            tr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            ti2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            ch(i,1,k) = cc(i,k,1)+ti2
            ch(ic,2,k) = ti2-cc(i,k,1)
            ch(i-1,1,k) = cc(i-1,k,1)+tr2
            ch(ic-1,2,k) = cc(i-1,k,1)-tr2
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 do 106 k=1,l1
         ch(1,2,k) = -cc(ido,k,2)
         ch(ido,1,k) = cc(ido,k,1)
  106 continue
  107 return
      end
      subroutine radf3 (ido,l1,cc,ch,wa1,wa2)
      dimension       ch(ido,3,l1)           ,cc(ido,l1,3)           ,
     1                wa1(1)     ,wa2(1)
      data taur,taui /-.5,.866025403784439/
      do 101 k=1,l1
         cr2 = cc(1,k,2)+cc(1,k,3)
         ch(1,1,k) = cc(1,k,1)+cr2
         ch(1,3,k) = taui*(cc(1,k,3)-cc(1,k,2))
         ch(ido,2,k) = cc(1,k,1)+taur*cr2
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            dr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            di2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            dr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            di3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            cr2 = dr2+dr3
            ci2 = di2+di3
            ch(i-1,1,k) = cc(i-1,k,1)+cr2
            ch(i,1,k) = cc(i,k,1)+ci2
            tr2 = cc(i-1,k,1)+taur*cr2
            ti2 = cc(i,k,1)+taur*ci2
            tr3 = taui*(di2-di3)
            ti3 = taui*(dr3-dr2)
            ch(i-1,3,k) = tr2+tr3
            ch(ic-1,2,k) = tr2-tr3
            ch(i,3,k) = ti2+ti3
            ch(ic,2,k) = ti3-ti2
  102    continue
  103 continue
      return
      end
      subroutine radf4 (ido,l1,cc,ch,wa1,wa2,wa3)
      dimension       cc(ido,l1,4)           ,ch(ido,4,l1)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)
      data hsqt2 /.7071067811865475/
      do 101 k=1,l1
         tr1 = cc(1,k,2)+cc(1,k,4)
         tr2 = cc(1,k,1)+cc(1,k,3)
         ch(1,1,k) = tr1+tr2
         ch(ido,4,k) = tr2-tr1
         ch(ido,2,k) = cc(1,k,1)-cc(1,k,3)
         ch(1,3,k) = cc(1,k,4)-cc(1,k,2)
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            cr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            ci2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            cr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            ci3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            cr4 = wa3(i-2)*cc(i-1,k,4)+wa3(i-1)*cc(i,k,4)
            ci4 = wa3(i-2)*cc(i,k,4)-wa3(i-1)*cc(i-1,k,4)
            tr1 = cr2+cr4
            tr4 = cr4-cr2
            ti1 = ci2+ci4
            ti4 = ci2-ci4
            ti2 = cc(i,k,1)+ci3
            ti3 = cc(i,k,1)-ci3
            tr2 = cc(i-1,k,1)+cr3
            tr3 = cc(i-1,k,1)-cr3
            ch(i-1,1,k) = tr1+tr2
            ch(ic-1,4,k) = tr2-tr1
            ch(i,1,k) = ti1+ti2
            ch(ic,4,k) = ti1-ti2
            ch(i-1,3,k) = ti4+tr3
            ch(ic-1,2,k) = tr3-ti4
            ch(i,3,k) = tr4+ti3
            ch(ic,2,k) = tr4-ti3
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 continue
      do 106 k=1,l1
         ti1 = -hsqt2*(cc(ido,k,2)+cc(ido,k,4))
         tr1 = hsqt2*(cc(ido,k,2)-cc(ido,k,4))
         ch(ido,1,k) = tr1+cc(ido,k,1)
         ch(ido,3,k) = cc(ido,k,1)-tr1
         ch(1,2,k) = ti1-cc(ido,k,3)
         ch(1,4,k) = ti1+cc(ido,k,3)
  106 continue
  107 return
      end
      subroutine radf5 (ido,l1,cc,ch,wa1,wa2,wa3,wa4)
      dimension       cc(ido,l1,5)           ,ch(ido,5,l1)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)     ,wa4(1)
      data tr11,ti11,tr12,ti12 /.309016994374947,.951056516295154,
     1-.809016994374947,.587785252292473/
      do 101 k=1,l1
         cr2 = cc(1,k,5)+cc(1,k,2)
         ci5 = cc(1,k,5)-cc(1,k,2)
         cr3 = cc(1,k,4)+cc(1,k,3)
         ci4 = cc(1,k,4)-cc(1,k,3)
         ch(1,1,k) = cc(1,k,1)+cr2+cr3
         ch(ido,2,k) = cc(1,k,1)+tr11*cr2+tr12*cr3
         ch(1,3,k) = ti11*ci5+ti12*ci4
         ch(ido,4,k) = cc(1,k,1)+tr12*cr2+tr11*cr3
         ch(1,5,k) = ti12*ci5-ti11*ci4
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            dr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            di2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            dr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            di3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            dr4 = wa3(i-2)*cc(i-1,k,4)+wa3(i-1)*cc(i,k,4)
            di4 = wa3(i-2)*cc(i,k,4)-wa3(i-1)*cc(i-1,k,4)
            dr5 = wa4(i-2)*cc(i-1,k,5)+wa4(i-1)*cc(i,k,5)
            di5 = wa4(i-2)*cc(i,k,5)-wa4(i-1)*cc(i-1,k,5)
            cr2 = dr2+dr5
            ci5 = dr5-dr2
            cr5 = di2-di5
            ci2 = di2+di5
            cr3 = dr3+dr4
            ci4 = dr4-dr3
            cr4 = di3-di4
            ci3 = di3+di4
            ch(i-1,1,k) = cc(i-1,k,1)+cr2+cr3
            ch(i,1,k) = cc(i,k,1)+ci2+ci3
            tr2 = cc(i-1,k,1)+tr11*cr2+tr12*cr3
            ti2 = cc(i,k,1)+tr11*ci2+tr12*ci3
            tr3 = cc(i-1,k,1)+tr12*cr2+tr11*cr3
            ti3 = cc(i,k,1)+tr12*ci2+tr11*ci3
            tr5 = ti11*cr5+ti12*cr4
            ti5 = ti11*ci5+ti12*ci4
            tr4 = ti12*cr5-ti11*cr4
            ti4 = ti12*ci5-ti11*ci4
            ch(i-1,3,k) = tr2+tr5
            ch(ic-1,2,k) = tr2-tr5
            ch(i,3,k) = ti2+ti5
            ch(ic,2,k) = ti5-ti2
            ch(i-1,5,k) = tr3+tr4
            ch(ic-1,4,k) = tr3-tr4
            ch(i,5,k) = ti3+ti4
            ch(ic,4,k) = ti4-ti3
  102    continue
  103 continue
      return
      end
      subroutine radfg (ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa)
      dimension       ch(ido,l1,ip)          ,cc(ido,ip,l1)          ,
     1                c1(ido,l1,ip)          ,c2(idl1,ip),
     2                ch2(idl1,ip)           ,wa(1)
      data tpi/6.28318530717959/
      arg = tpi/float(ip)
      dcp = cos(arg)
      dsp = sin(arg)
      ipph = (ip+1)/2
      ipp2 = ip+2
      idp2 = ido+2
      nbd = (ido-1)/2
      if (ido .eq. 1) go to 119
      do 101 ik=1,idl1
         ch2(ik,1) = c2(ik,1)
  101 continue
      do 103 j=2,ip
         do 102 k=1,l1
            ch(1,k,j) = c1(1,k,j)
  102    continue
  103 continue
      if (nbd .gt. l1) go to 107
      is = -ido
      do 106 j=2,ip
         is = is+ido
         idij = is
         do 105 i=3,ido,2
            idij = idij+2
            do 104 k=1,l1
               ch(i-1,k,j) = wa(idij-1)*c1(i-1,k,j)+wa(idij)*c1(i,k,j)
               ch(i,k,j) = wa(idij-1)*c1(i,k,j)-wa(idij)*c1(i-1,k,j)
  104       continue
  105    continue
  106 continue
      go to 111
  107 is = -ido
      do 110 j=2,ip
         is = is+ido
         do 109 k=1,l1
            idij = is
            do 108 i=3,ido,2
               idij = idij+2
               ch(i-1,k,j) = wa(idij-1)*c1(i-1,k,j)+wa(idij)*c1(i,k,j)
               ch(i,k,j) = wa(idij-1)*c1(i,k,j)-wa(idij)*c1(i-1,k,j)
  108       continue
  109    continue
  110 continue
  111 if (nbd .lt. l1) go to 115
      do 114 j=2,ipph
         jc = ipp2-j
         do 113 k=1,l1
            do 112 i=3,ido,2
               c1(i-1,k,j) = ch(i-1,k,j)+ch(i-1,k,jc)
               c1(i-1,k,jc) = ch(i,k,j)-ch(i,k,jc)
               c1(i,k,j) = ch(i,k,j)+ch(i,k,jc)
               c1(i,k,jc) = ch(i-1,k,jc)-ch(i-1,k,j)
  112       continue
  113    continue
  114 continue
      go to 121
  115 do 118 j=2,ipph
         jc = ipp2-j
         do 117 i=3,ido,2
            do 116 k=1,l1
               c1(i-1,k,j) = ch(i-1,k,j)+ch(i-1,k,jc)
               c1(i-1,k,jc) = ch(i,k,j)-ch(i,k,jc)
               c1(i,k,j) = ch(i,k,j)+ch(i,k,jc)
               c1(i,k,jc) = ch(i-1,k,jc)-ch(i-1,k,j)
  116       continue
  117    continue
  118 continue
      go to 121
  119 do 120 ik=1,idl1
         c2(ik,1) = ch2(ik,1)
  120 continue
  121 do 123 j=2,ipph
         jc = ipp2-j
         do 122 k=1,l1
            c1(1,k,j) = ch(1,k,j)+ch(1,k,jc)
            c1(1,k,jc) = ch(1,k,jc)-ch(1,k,j)
  122    continue
  123 continue
c
      ar1 = 1.
      ai1 = 0.
      do 127 l=2,ipph
         lc = ipp2-l
         ar1h = dcp*ar1-dsp*ai1
         ai1 = dcp*ai1+dsp*ar1
         ar1 = ar1h
         do 124 ik=1,idl1
            ch2(ik,l) = c2(ik,1)+ar1*c2(ik,2)
            ch2(ik,lc) = ai1*c2(ik,ip)
  124    continue
         dc2 = ar1
         ds2 = ai1
         ar2 = ar1
         ai2 = ai1
         do 126 j=3,ipph
            jc = ipp2-j
            ar2h = dc2*ar2-ds2*ai2
            ai2 = dc2*ai2+ds2*ar2
            ar2 = ar2h
            do 125 ik=1,idl1
               ch2(ik,l) = ch2(ik,l)+ar2*c2(ik,j)
               ch2(ik,lc) = ch2(ik,lc)+ai2*c2(ik,jc)
  125       continue
  126    continue
  127 continue
      do 129 j=2,ipph
         do 128 ik=1,idl1
            ch2(ik,1) = ch2(ik,1)+c2(ik,j)
  128    continue
  129 continue
c
      if (ido .lt. l1) go to 132
      do 131 k=1,l1
         do 130 i=1,ido
            cc(i,1,k) = ch(i,k,1)
  130    continue
  131 continue
      go to 135
  132 do 134 i=1,ido
         do 133 k=1,l1
            cc(i,1,k) = ch(i,k,1)
  133    continue
  134 continue
  135 do 137 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 136 k=1,l1
            cc(ido,j2-2,k) = ch(1,k,j)
            cc(1,j2-1,k) = ch(1,k,jc)
  136    continue
  137 continue
      if (ido .eq. 1) return
      if (nbd .lt. l1) go to 141
      do 140 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 139 k=1,l1
            do 138 i=3,ido,2
               ic = idp2-i
               cc(i-1,j2-1,k) = ch(i-1,k,j)+ch(i-1,k,jc)
               cc(ic-1,j2-2,k) = ch(i-1,k,j)-ch(i-1,k,jc)
               cc(i,j2-1,k) = ch(i,k,j)+ch(i,k,jc)
               cc(ic,j2-2,k) = ch(i,k,jc)-ch(i,k,j)
  138       continue
  139    continue
  140 continue
      return
  141 do 144 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 143 i=3,ido,2
            ic = idp2-i
            do 142 k=1,l1
               cc(i-1,j2-1,k) = ch(i-1,k,j)+ch(i-1,k,jc)
               cc(ic-1,j2-2,k) = ch(i-1,k,j)-ch(i-1,k,jc)
               cc(i,j2-1,k) = ch(i,k,j)+ch(i,k,jc)
               cc(ic,j2-2,k) = ch(i,k,jc)-ch(i,k,j)
  142       continue
  143    continue
  144 continue
      return
      end
      subroutine rfftf (n,r,wsave)
      dimension       r(1)       ,wsave(1)
      if (n .eq. 1) return
      call rfftf1 (n,r,wsave,wsave(n+1),wsave(2*n+1))
      return
      end
      subroutine rfftf1 (n,c,ch,wa,ifac)
      dimension       ch(1)      ,c(1)       ,wa(1)      ,ifac(1)
      nf = ifac(2)
      na = 1
      l2 = n
      iw = n
      do 111 k1=1,nf
         kh = nf-k1
         ip = ifac(kh+3)
         l1 = l2/ip
         ido = n/l2
         idl1 = ido*l1
         iw = iw-(ip-1)*ido
         na = 1-na
         if (ip .ne. 4) go to 102
         ix2 = iw+ido
         ix3 = ix2+ido
         if (na .ne. 0) go to 101
         call radf4 (ido,l1,c,ch,wa(iw),wa(ix2),wa(ix3))
         go to 110
  101    call radf4 (ido,l1,ch,c,wa(iw),wa(ix2),wa(ix3))
         go to 110
  102    if (ip .ne. 2) go to 104
         if (na .ne. 0) go to 103
         call radf2 (ido,l1,c,ch,wa(iw))
         go to 110
  103    call radf2 (ido,l1,ch,c,wa(iw))
         go to 110
  104    if (ip .ne. 3) go to 106
         ix2 = iw+ido
         if (na .ne. 0) go to 105
         call radf3 (ido,l1,c,ch,wa(iw),wa(ix2))
         go to 110
  105    call radf3 (ido,l1,ch,c,wa(iw),wa(ix2))
         go to 110
  106    if (ip .ne. 5) go to 108
         ix2 = iw+ido
         ix3 = ix2+ido
         ix4 = ix3+ido
         if (na .ne. 0) go to 107
         call radf5 (ido,l1,c,ch,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  107    call radf5 (ido,l1,ch,c,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  108    if (ido .eq. 1) na = 1-na
         if (na .ne. 0) go to 109
         call radfg (ido,ip,l1,idl1,c,c,c,ch,ch,wa(iw))
         na = 1
         go to 110
  109    call radfg (ido,ip,l1,idl1,ch,ch,ch,c,c,wa(iw))
         na = 0
  110    l2 = l1
  111 continue
      if (na .eq. 1) return
      do 112 i=1,n
         c(i) = ch(i)
  112 continue
      return
      end
      subroutine cost2 (n,x,wsave)
      dimension       x(1)       ,wsave(1)
      nm1 = n-1
      np1 = n+1
      ns2 = n/2
      if (n-2) 106,101,102
  101 x1h = x(1)+x(2)
      x(2) = x(1)-x(2)
      x(1) = x1h
      return
  102 if (n .gt. 3) go to 103
      x1p3 = x(1)+x(3)
      tx2 = x(2)+x(2)
      x(2) = x(1)-x(3)
      x(1) = x1p3+tx2
      x(3) = x1p3-tx2
      return
  103 c1 = x(1)-x(n)
      x(1) = x(1)+x(n)
      do 104 k=2,ns2
         kc = np1-k
         t1 = x(k)+x(kc)
         t2 = x(k)-x(kc)
         c1 = c1+wsave(kc)*t2
         t2 = wsave(k)*t2
         x(k) = t1-t2
         x(kc) = t1+t2
  104 continue
      modn = mod(n,2)
      if (modn .ne. 0) x(ns2+1) = x(ns2+1)+x(ns2+1)
      call rfftf2 (nm1,x,wsave(n+1))
      xim2 = x(2)
      x(2) = c1
      do 105 i=4,n,2
         xi = x(i)
         x(i) = x(i-2)-x(i-1)
         x(i-1) = xim2
         xim2 = xi
  105 continue
      if (modn .ne. 0) x(n) = xim2
  106 return
      end
      subroutine radf22 (ido,l1,cc,ch,wa1)
      dimension       ch(ido,2,l1)           ,cc(ido,l1,2)           ,
     1                wa1(1)
      do 101 k=1,l1
         ch(1,1,k) = cc(1,k,1)+cc(1,k,2)
         ch(ido,2,k) = cc(1,k,1)-cc(1,k,2)
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            tr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            ti2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            ch(i,1,k) = cc(i,k,1)+ti2
            ch(ic,2,k) = ti2-cc(i,k,1)
            ch(i-1,1,k) = cc(i-1,k,1)+tr2
            ch(ic-1,2,k) = cc(i-1,k,1)-tr2
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 do 106 k=1,l1
         ch(1,2,k) = -cc(ido,k,2)
         ch(ido,1,k) = cc(ido,k,1)
  106 continue
  107 return
      end
      subroutine radf32 (ido,l1,cc,ch,wa1,wa2)
      dimension       ch(ido,3,l1)           ,cc(ido,l1,3)           ,
     1                wa1(1)     ,wa2(1)
      data taur,taui /-.5,.866025403784439/
      do 101 k=1,l1
         cr2 = cc(1,k,2)+cc(1,k,3)
         ch(1,1,k) = cc(1,k,1)+cr2
         ch(1,3,k) = taui*(cc(1,k,3)-cc(1,k,2))
         ch(ido,2,k) = cc(1,k,1)+taur*cr2
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            dr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            di2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            dr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            di3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            cr2 = dr2+dr3
            ci2 = di2+di3
            ch(i-1,1,k) = cc(i-1,k,1)+cr2
            ch(i,1,k) = cc(i,k,1)+ci2
            tr2 = cc(i-1,k,1)+taur*cr2
            ti2 = cc(i,k,1)+taur*ci2
            tr3 = taui*(di2-di3)
            ti3 = taui*(dr3-dr2)
            ch(i-1,3,k) = tr2+tr3
            ch(ic-1,2,k) = tr2-tr3
            ch(i,3,k) = ti2+ti3
            ch(ic,2,k) = ti3-ti2
  102    continue
  103 continue
      return
      end
      subroutine radf42 (ido,l1,cc,ch,wa1,wa2,wa3)
      dimension       cc(ido,l1,4)           ,ch(ido,4,l1)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)
      data hsqt2 /.7071067811865475/
      do 101 k=1,l1
         tr1 = cc(1,k,2)+cc(1,k,4)
         tr2 = cc(1,k,1)+cc(1,k,3)
         ch(1,1,k) = tr1+tr2
         ch(ido,4,k) = tr2-tr1
         ch(ido,2,k) = cc(1,k,1)-cc(1,k,3)
         ch(1,3,k) = cc(1,k,4)-cc(1,k,2)
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            cr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            ci2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            cr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            ci3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            cr4 = wa3(i-2)*cc(i-1,k,4)+wa3(i-1)*cc(i,k,4)
            ci4 = wa3(i-2)*cc(i,k,4)-wa3(i-1)*cc(i-1,k,4)
            tr1 = cr2+cr4
            tr4 = cr4-cr2
            ti1 = ci2+ci4
            ti4 = ci2-ci4
            ti2 = cc(i,k,1)+ci3
            ti3 = cc(i,k,1)-ci3
            tr2 = cc(i-1,k,1)+cr3
            tr3 = cc(i-1,k,1)-cr3
            ch(i-1,1,k) = tr1+tr2
            ch(ic-1,4,k) = tr2-tr1
            ch(i,1,k) = ti1+ti2
            ch(ic,4,k) = ti1-ti2
            ch(i-1,3,k) = ti4+tr3
            ch(ic-1,2,k) = tr3-ti4
            ch(i,3,k) = tr4+ti3
            ch(ic,2,k) = tr4-ti3
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 continue
      do 106 k=1,l1
         ti1 = -hsqt2*(cc(ido,k,2)+cc(ido,k,4))
         tr1 = hsqt2*(cc(ido,k,2)-cc(ido,k,4))
         ch(ido,1,k) = tr1+cc(ido,k,1)
         ch(ido,3,k) = cc(ido,k,1)-tr1
         ch(1,2,k) = ti1-cc(ido,k,3)
         ch(1,4,k) = ti1+cc(ido,k,3)
  106 continue
  107 return
      end
      subroutine radf52 (ido,l1,cc,ch,wa1,wa2,wa3,wa4)
      dimension       cc(ido,l1,5)           ,ch(ido,5,l1)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)     ,wa4(1)
      data tr11,ti11,tr12,ti12 /.309016994374947,.951056516295154,
     1-.809016994374947,.587785252292473/
      do 101 k=1,l1
         cr2 = cc(1,k,5)+cc(1,k,2)
         ci5 = cc(1,k,5)-cc(1,k,2)
         cr3 = cc(1,k,4)+cc(1,k,3)
         ci4 = cc(1,k,4)-cc(1,k,3)
         ch(1,1,k) = cc(1,k,1)+cr2+cr3
         ch(ido,2,k) = cc(1,k,1)+tr11*cr2+tr12*cr3
         ch(1,3,k) = ti11*ci5+ti12*ci4
         ch(ido,4,k) = cc(1,k,1)+tr12*cr2+tr11*cr3
         ch(1,5,k) = ti12*ci5-ti11*ci4
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            dr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            di2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            dr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            di3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            dr4 = wa3(i-2)*cc(i-1,k,4)+wa3(i-1)*cc(i,k,4)
            di4 = wa3(i-2)*cc(i,k,4)-wa3(i-1)*cc(i-1,k,4)
            dr5 = wa4(i-2)*cc(i-1,k,5)+wa4(i-1)*cc(i,k,5)
            di5 = wa4(i-2)*cc(i,k,5)-wa4(i-1)*cc(i-1,k,5)
            cr2 = dr2+dr5
            ci5 = dr5-dr2
            cr5 = di2-di5
            ci2 = di2+di5
            cr3 = dr3+dr4
            ci4 = dr4-dr3
            cr4 = di3-di4
            ci3 = di3+di4
            ch(i-1,1,k) = cc(i-1,k,1)+cr2+cr3
            ch(i,1,k) = cc(i,k,1)+ci2+ci3
            tr2 = cc(i-1,k,1)+tr11*cr2+tr12*cr3
            ti2 = cc(i,k,1)+tr11*ci2+tr12*ci3
            tr3 = cc(i-1,k,1)+tr12*cr2+tr11*cr3
            ti3 = cc(i,k,1)+tr12*ci2+tr11*ci3
            tr5 = ti11*cr5+ti12*cr4
            ti5 = ti11*ci5+ti12*ci4
            tr4 = ti12*cr5-ti11*cr4
            ti4 = ti12*ci5-ti11*ci4
            ch(i-1,3,k) = tr2+tr5
            ch(ic-1,2,k) = tr2-tr5
            ch(i,3,k) = ti2+ti5
            ch(ic,2,k) = ti5-ti2
            ch(i-1,5,k) = tr3+tr4
            ch(ic-1,4,k) = tr3-tr4
            ch(i,5,k) = ti3+ti4
            ch(ic,4,k) = ti4-ti3
  102    continue
  103 continue
      return
      end
      subroutine radfg2 (ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa)
      dimension       ch(ido,l1,ip)          ,cc(ido,ip,l1)          ,
     1                c1(ido,l1,ip)          ,c2(idl1,ip),
     2                ch2(idl1,ip)           ,wa(1)
      data tpi/6.28318530717959/
      arg = tpi/float(ip)
      dcp = cos(arg)
      dsp = sin(arg)
      ipph = (ip+1)/2
      ipp2 = ip+2
      idp2 = ido+2
      nbd = (ido-1)/2
      if (ido .eq. 1) go to 119
      do 101 ik=1,idl1
         ch2(ik,1) = c2(ik,1)
  101 continue
      do 103 j=2,ip
         do 102 k=1,l1
            ch(1,k,j) = c1(1,k,j)
  102    continue
  103 continue
      if (nbd .gt. l1) go to 107
      is = -ido
      do 106 j=2,ip
         is = is+ido
         idij = is
         do 105 i=3,ido,2
            idij = idij+2
            do 104 k=1,l1
               ch(i-1,k,j) = wa(idij-1)*c1(i-1,k,j)+wa(idij)*c1(i,k,j)
               ch(i,k,j) = wa(idij-1)*c1(i,k,j)-wa(idij)*c1(i-1,k,j)
  104       continue
  105    continue
  106 continue
      go to 111
  107 is = -ido
      do 110 j=2,ip
         is = is+ido
         do 109 k=1,l1
            idij = is
            do 108 i=3,ido,2
               idij = idij+2
               ch(i-1,k,j) = wa(idij-1)*c1(i-1,k,j)+wa(idij)*c1(i,k,j)
               ch(i,k,j) = wa(idij-1)*c1(i,k,j)-wa(idij)*c1(i-1,k,j)
  108       continue
  109    continue
  110 continue
  111 if (nbd .lt. l1) go to 115
      do 114 j=2,ipph
         jc = ipp2-j
         do 113 k=1,l1
            do 112 i=3,ido,2
               c1(i-1,k,j) = ch(i-1,k,j)+ch(i-1,k,jc)
               c1(i-1,k,jc) = ch(i,k,j)-ch(i,k,jc)
               c1(i,k,j) = ch(i,k,j)+ch(i,k,jc)
               c1(i,k,jc) = ch(i-1,k,jc)-ch(i-1,k,j)
  112       continue
  113    continue
  114 continue
      go to 121
  115 do 118 j=2,ipph
         jc = ipp2-j
         do 117 i=3,ido,2
            do 116 k=1,l1
               c1(i-1,k,j) = ch(i-1,k,j)+ch(i-1,k,jc)
               c1(i-1,k,jc) = ch(i,k,j)-ch(i,k,jc)
               c1(i,k,j) = ch(i,k,j)+ch(i,k,jc)
               c1(i,k,jc) = ch(i-1,k,jc)-ch(i-1,k,j)
  116       continue
  117    continue
  118 continue
      go to 121
  119 do 120 ik=1,idl1
         c2(ik,1) = ch2(ik,1)
  120 continue
  121 do 123 j=2,ipph
         jc = ipp2-j
         do 122 k=1,l1
            c1(1,k,j) = ch(1,k,j)+ch(1,k,jc)
            c1(1,k,jc) = ch(1,k,jc)-ch(1,k,j)
  122    continue
  123 continue
c
      ar1 = 1.
      ai1 = 0.
      do 127 l=2,ipph
         lc = ipp2-l
         ar1h = dcp*ar1-dsp*ai1
         ai1 = dcp*ai1+dsp*ar1
         ar1 = ar1h
         do 124 ik=1,idl1
            ch2(ik,l) = c2(ik,1)+ar1*c2(ik,2)
            ch2(ik,lc) = ai1*c2(ik,ip)
  124    continue
         dc2 = ar1
         ds2 = ai1
         ar2 = ar1
         ai2 = ai1
         do 126 j=3,ipph
            jc = ipp2-j
            ar2h = dc2*ar2-ds2*ai2
            ai2 = dc2*ai2+ds2*ar2
            ar2 = ar2h
            do 125 ik=1,idl1
               ch2(ik,l) = ch2(ik,l)+ar2*c2(ik,j)
               ch2(ik,lc) = ch2(ik,lc)+ai2*c2(ik,jc)
  125       continue
  126    continue
  127 continue
      do 129 j=2,ipph
         do 128 ik=1,idl1
            ch2(ik,1) = ch2(ik,1)+c2(ik,j)
  128    continue
  129 continue
c
      if (ido .lt. l1) go to 132
      do 131 k=1,l1
         do 130 i=1,ido
            cc(i,1,k) = ch(i,k,1)
  130    continue
  131 continue
      go to 135
  132 do 134 i=1,ido
         do 133 k=1,l1
            cc(i,1,k) = ch(i,k,1)
  133    continue
  134 continue
  135 do 137 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 136 k=1,l1
            cc(ido,j2-2,k) = ch(1,k,j)
            cc(1,j2-1,k) = ch(1,k,jc)
  136    continue
  137 continue
      if (ido .eq. 1) return
      if (nbd .lt. l1) go to 141
      do 140 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 139 k=1,l1
            do 138 i=3,ido,2
               ic = idp2-i
               cc(i-1,j2-1,k) = ch(i-1,k,j)+ch(i-1,k,jc)
               cc(ic-1,j2-2,k) = ch(i-1,k,j)-ch(i-1,k,jc)
               cc(i,j2-1,k) = ch(i,k,j)+ch(i,k,jc)
               cc(ic,j2-2,k) = ch(i,k,jc)-ch(i,k,j)
  138       continue
  139    continue
  140 continue
      return
  141 do 144 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 143 i=3,ido,2
            ic = idp2-i
            do 142 k=1,l1
               cc(i-1,j2-1,k) = ch(i-1,k,j)+ch(i-1,k,jc)
               cc(ic-1,j2-2,k) = ch(i-1,k,j)-ch(i-1,k,jc)
               cc(i,j2-1,k) = ch(i,k,j)+ch(i,k,jc)
               cc(ic,j2-2,k) = ch(i,k,jc)-ch(i,k,j)
  142       continue
  143    continue
  144 continue
      return
      end
      subroutine rfftf2 (n,r,wsave)
      dimension       r(1)       ,wsave(1)
      if (n .eq. 1) return
      call rfftf12 (n,r,wsave,wsave(n+1),wsave(2*n+1))
      return
      end
      subroutine rfftf12 (n,c,ch,wa,ifac)
      dimension       ch(1)      ,c(1)       ,wa(1)      ,ifac(1)
      nf = ifac(2)
      na = 1
      l2 = n
      iw = n
      do 111 k1=1,nf
         kh = nf-k1
         ip = ifac(kh+3)
         l1 = l2/ip
         ido = n/l2
         idl1 = ido*l1
         iw = iw-(ip-1)*ido
         na = 1-na
         if (ip .ne. 4) go to 102
         ix2 = iw+ido
         ix3 = ix2+ido
         if (na .ne. 0) go to 101
         call radf42 (ido,l1,c,ch,wa(iw),wa(ix2),wa(ix3))
         go to 110
  101    call radf42 (ido,l1,ch,c,wa(iw),wa(ix2),wa(ix3))
         go to 110
  102    if (ip .ne. 2) go to 104
         if (na .ne. 0) go to 103
         call radf22 (ido,l1,c,ch,wa(iw))
         go to 110
  103    call radf22 (ido,l1,ch,c,wa(iw))
         go to 110
  104    if (ip .ne. 3) go to 106
         ix2 = iw+ido
         if (na .ne. 0) go to 105
         call radf32 (ido,l1,c,ch,wa(iw),wa(ix2))
         go to 110
  105    call radf32 (ido,l1,ch,c,wa(iw),wa(ix2))
         go to 110
  106    if (ip .ne. 5) go to 108
         ix2 = iw+ido
         ix3 = ix2+ido
         ix4 = ix3+ido
         if (na .ne. 0) go to 107
         call radf52 (ido,l1,c,ch,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  107    call radf52 (ido,l1,ch,c,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  108    if (ido .eq. 1) na = 1-na
         if (na .ne. 0) go to 109
         call radfg2 (ido,ip,l1,idl1,c,c,c,ch,ch,wa(iw))
         na = 1
         go to 110
  109    call radfg2 (ido,ip,l1,idl1,ch,ch,ch,c,c,wa(iw))
         na = 0
  110    l2 = l1
  111 continue
      if (na .eq. 1) return
      do 112 i=1,n
         c(i) = ch(i)
  112 continue
      return
      end
      subroutine costi (n,wsave)
      dimension       wsave(1)
      data pi /3.14159265358979/
      if (n .le. 3) return
      nm1 = n-1
      np1 = n+1
      ns2 = n/2
      dt = pi/float(nm1)
      fk = 0.
      do 101 k=2,ns2
         kc = np1-k
         fk = fk+1.
         wsave(k) = 2.*sin(fk*dt)
         wsave(kc) = 2.*cos(fk*dt)
  101 continue
      call rffti (nm1,wsave(n+1))
      return
      end
      subroutine rffti (n,wsave)
      dimension       wsave(1)
      if (n .eq. 1) return
      call rffti1 (n,wsave(n+1),wsave(2*n+1))
      return
      end
      subroutine rffti1 (n,wa,ifac)
      dimension       wa(1)      ,ifac(1)    ,ntryh(4)
      data ntryh(1),ntryh(2),ntryh(3),ntryh(4)/4,2,3,5/
      nl = n
      nf = 0
      j = 0
  101 j = j+1
      if (j-4) 102,102,103
  102 ntry = ntryh(j)
      go to 104
  103 ntry = ntry+2
  104 nq = nl/ntry
      nr = nl-ntry*nq
      if (nr) 101,105,101
  105 nf = nf+1
      ifac(nf+2) = ntry
      nl = nq
      if (ntry .ne. 2) go to 107
      if (nf .eq. 1) go to 107
      do 106 i=2,nf
         ib = nf-i+2
         ifac(ib+2) = ifac(ib+1)
  106 continue
      ifac(3) = 2
  107 if (nl .ne. 1) go to 104
      ifac(1) = n
      ifac(2) = nf
      tpi = 6.28318530717959
      argh = tpi/float(n)
      is = 0
      nfm1 = nf-1
      l1 = 1
      if (nfm1 .eq. 0) return
      do 110 k1=1,nfm1
         ip = ifac(k1+2)
         ld = 0
         l2 = l1*ip
         ido = n/l2
         ipm = ip-1
         do 109 j=1,ipm
            ld = ld+l1
            i = is
            argld = float(ld)*argh
            fi = 0.
            do 108 ii=3,ido,2
               i = i+2
               fi = fi+1.
               arg = fi*argld
               wa(i-1) = cos(arg)
               wa(i) = sin(arg)
  108       continue
            is = is+ido
  109    continue
         l1 = l2
  110 continue
      return
      end
      subroutine costi2 (n,wsave)
      dimension       wsave(1)
      data pi /3.14159265358979/
      if (n .le. 3) return
      nm1 = n-1
      np1 = n+1
      ns2 = n/2
      dt = pi/float(nm1)
      fk = 0.
      do 101 k=2,ns2
         kc = np1-k
         fk = fk+1.
         wsave(k) = 2.*sin(fk*dt)
         wsave(kc) = 2.*cos(fk*dt)
  101 continue
      call rffti2 (nm1,wsave(n+1))
      return
      end
      subroutine rffti2 (n,wsave)
      dimension       wsave(1)
      if (n .eq. 1) return
      call rffti12 (n,wsave(n+1),wsave(2*n+1))
      return
      end
      subroutine rffti12 (n,wa,ifac)
      dimension       wa(1)      ,ifac(1)    ,ntryh(4)
      data ntryh(1),ntryh(2),ntryh(3),ntryh(4)/4,2,3,5/
      nl = n
      nf = 0
      j = 0
  101 j = j+1
      if (j-4) 102,102,103
  102 ntry = ntryh(j)
      go to 104
  103 ntry = ntry+2
  104 nq = nl/ntry
      nr = nl-ntry*nq
      if (nr) 101,105,101
  105 nf = nf+1
      ifac(nf+2) = ntry
      nl = nq
      if (ntry .ne. 2) go to 107
      if (nf .eq. 1) go to 107
      do 106 i=2,nf
         ib = nf-i+2
         ifac(ib+2) = ifac(ib+1)
  106 continue
      ifac(3) = 2
  107 if (nl .ne. 1) go to 104
      ifac(1) = n
      ifac(2) = nf
      tpi = 6.28318530717959
      argh = tpi/float(n)
      is = 0
      nfm1 = nf-1
      l1 = 1
      if (nfm1 .eq. 0) return
      do 110 k1=1,nfm1
         ip = ifac(k1+2)
         ld = 0
         l2 = l1*ip
         ido = n/l2
         ipm = ip-1
         do 109 j=1,ipm
            ld = ld+l1
            i = is
            argld = float(ld)*argh
            fi = 0.
            do 108 ii=3,ido,2
               i = i+2
               fi = fi+1.
               arg = fi*argld
               wa(i-1) = cos(arg)
               wa(i) = sin(arg)
  108       continue
            is = is+ido
  109    continue
         l1 = l2
  110 continue
      return
      end
      integer function isamax(n,sx,incx) 
c
c     finds the index of element having max. absolute value.            
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sx(1),smax                                                   
      integer i,incx,ix,n                                               
c                                                                       
      isamax = 0                                                        
      if( n .lt. 1 ) return                                             
      isamax = 1                                                        
      if(n.eq.1)return                                                  
      if(incx.eq.1)go to 20                                             
c                                                                       
c        code for increment not equal to 1                              
c                                                                       
      ix = 1                                                            
      smax = abs(sx(1))                                                 
      ix = ix + incx                                                    
      do 10 i = 2,n                                                     
         if(abs(sx(ix)).le.smax) go to 5                                
         isamax = i                                                     
         smax = abs(sx(ix))                                             
    5    ix = ix + incx                                                 
   10 continue                                                          
      return                                                            
c                                                                       
c        code for increment equal to 1                                  
c                                                                       
   20 smax = abs(sx(1))                                                 
      do 30 i = 2,n                                                     
         if(abs(sx(i)).le.smax) go to 30                                
         isamax = i                                                     
         smax = abs(sx(i))                                              
   30 continue                                                          
      return                                                            
      end                                                               
      integer function isamin(n,sx,incx)
c***begin prologue  isamin
c***revision date  811015   (yymmdd)
c***category no.  f1a
c***keywords  blas,vector,smallest component
c***date written  october 1979
c***author lawson c. (jpl),hanson r. (sla),
c                            kincaid d. (u texas), krogh f. (jpl)
c***purpose
c    find smallest component of s.p. vector
c***description
c                b l a s  subprogram
c    description of parameters
c
c     --input--
c        n  number of elements in input vector(s)
c       sx  single precision vector with n elements
c     incx  storage spacing between elements of sx
c
c     --output--
c   isamin  smallest index (zero if n.le.0)
c
c     find smallest index of minimum magnitude of single precision sx.
c     isamin =  first i, i = 1 to n, to minimize  abs(sx(1-incx+i*incx)
c
c***references
c  lawson c.l., hanson r.j., kincaid d.r., krogh f.t.,
c   *basic linear algebra subprograms for fortran usage*,
c  algorithm no. 539, transactions on mathematical software,
c  volume 5, number 3, september 1979, 308-323
c***routines called  (none)
c***end prologue  isamin
c
      real sx(1),smin,xmag
c***first executable statement  isamin
      isamin = 0
      if(n.le.0) return
      isamin = 1
      if(n.le.1)return
      if(incx.eq.1)goto 20
c
c        code for increments not equal to 1.
c
      smin = abs(sx(1))
      ns = n*incx
      ii = 1
          do 10 i=1,ns,incx
          xmag = abs(sx(i))
          if(xmag.ge.smin) go to 5
          isamin = ii
          smin = xmag
    5     ii = ii + 1
   10     continue
      return
c
c        code for increments equal to 1.
c
   20 smin = abs(sx(1))
      do 30 i = 2,n
         xmag = abs(sx(i))
         if(xmag.ge.smin) go to 30
         isamin = i
         smin = xmag
   30 continue
      return
      end
      subroutine lsaxpy(n,sa,sx,incx,sy,incy)
c
c     constant times a vector plus a vector.
c     uses unrolled loop for increments equal to one.
c     jack dongarra, linpack, 3/11/78.
c     modified 12/3/93, array(1) declarations changed to array(*)
c
      real sx(*),sy(*),sa
      integer i,incx,incy,ix,iy,m,mp1,n
c
      if(n.le.0)return
      if (sa .eq. 0.0) return
      if(incx.eq.1.and.incy.eq.1)go to 20
c
c        code for unequal increments or equal increments
c          not equal to 1
c
      ix = 1
      iy = 1
      if(incx.lt.0)ix = (-n+1)*incx + 1
      if(incy.lt.0)iy = (-n+1)*incy + 1
      do 10 i = 1,n
        sy(iy) = sy(iy) + sa*sx(ix)
        ix = ix + incx
        iy = iy + incy
   10 continue
      return
c
c        code for both increments equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,4)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        sy(i) = sy(i) + sa*sx(i)
   30 continue
      if( n .lt. 4 ) return
   40 mp1 = m + 1
      do 50 i = mp1,n,4
        sy(i) = sy(i) + sa*sx(i)
        sy(i + 1) = sy(i + 1) + sa*sx(i + 1)
        sy(i + 2) = sy(i + 2) + sa*sx(i + 2)
        sy(i + 3) = sy(i + 3) + sa*sx(i + 3)
   50 continue
      return
      end
      real function lsdot(n,sx,incx,sy,incy)
c
c     forms the dot product of two vectors.
c     uses unrolled loops for increments equal to one.
c     jack dongarra, linpack, 3/11/78.
c     modified 12/3/93, array(1) declarations changed to array(*)
c
      real sx(*),sy(*),stemp
      integer i,incx,incy,ix,iy,m,mp1,n
c
      stemp = 0.0e0
      lsdot = 0.0e0
      if(n.le.0)return
      if(incx.eq.1.and.incy.eq.1)go to 20
c
c        code for unequal increments or equal increments
c          not equal to 1
c
      ix = 1
      iy = 1
      if(incx.lt.0)ix = (-n+1)*incx + 1
      if(incy.lt.0)iy = (-n+1)*incy + 1
      do 10 i = 1,n
        stemp = stemp + sx(ix)*sy(iy)
        ix = ix + incx
        iy = iy + incy
   10 continue
      lsdot = stemp
      return
c
c        code for both increments equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,5)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        stemp = stemp + sx(i)*sy(i)
   30 continue
      if( n .lt. 5 ) go to 60
   40 mp1 = m + 1
      do 50 i = mp1,n,5
        stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) +
     *   sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4)
   50 continue
   60 lsdot = stemp
      return
      end
      function pimach(dum)
c***begin prologue  pimach
c
c     this subprogram supplies the value of the constant pi correct to
c     machine precision where
c
c     pi=3.1415926535897932384626433832795028841971693993751058209749446
c***routines called  (none)
c
      real dum, pimach
c
c***end prologue  pimach
c
c***first executable statement  pimach
      pimach = 3.14159265358979
      return
      end
      real function sasum(n,sx,incx)                                    
c                                                                       
c     takes the sum of the absolute values.                             
c     uses unrolled loops for increment equal to one.                   
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sx(1),stemp                                                  
      integer i,incx,m,mp1,n,nincx                                      
c                                                                       
      sasum = 0.0e0                                                     
      stemp = 0.0e0                                                     
      if(n.le.0)return                                                  
      if(incx.eq.1)go to 20                                             
c                                                                       
c        code for increment not equal to 1                              
c                                                                       
      nincx = n*incx                                                    
      do 10 i = 1,nincx,incx                                            
        stemp = stemp + abs(sx(i))                                      
   10 continue                                                          
      sasum = stemp                                                     
      return                                                            
c                                                                       
c        code for increment equal to 1                                  
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,6)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        stemp = stemp + abs(sx(i))                                      
   30 continue                                                          
      if( n .lt. 6 ) go to 60                                           
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,6                                                 
        stemp = stemp + abs(sx(i)) + abs(sx(i + 1)) + abs(sx(i + 2))    
     *  + abs(sx(i + 3)) + abs(sx(i + 4)) + abs(sx(i + 5))              
   50 continue                                                          
   60 sasum = stemp                                                     
      return                                                            
      end                                                               
      subroutine saxpy(n,sa,sx,incx,sy,incy)                            
c                                                                       
c     constant times a vector plus a vector.                            
c     uses unrolled loop for increments equal to one.                   
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sx(*),sy(*),sa                                               
      integer i,incx,incy,ix,iy,m,mp1,n                                 
c                                                                       
      if(n.le.0)return                                                  
      if (sa .eq. 0.0) return                                           
      if(incx.eq.1.and.incy.eq.1)go to 20                               
c                                                                       
c        code for unequal increments or equal increments                
c          not equal to 1                                               
c                                                                       
      ix = 1                                                            
      iy = 1                                                            
      if(incx.lt.0)ix = (-n+1)*incx + 1                                 
      if(incy.lt.0)iy = (-n+1)*incy + 1                                 
      do 10 i = 1,n                                                     
        sy(iy) = sy(iy) + sa*sx(ix)                                     
        ix = ix + incx                                                  
        iy = iy + incy                                                  
   10 continue                                                          
      return                                                            
c                                                                       
c        code for both increments equal to 1                            
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,4)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        sy(i) = sy(i) + sa*sx(i)                                        
   30 continue                                                          
      if( n .lt. 4 ) return                                             
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,4                                                 
        sy(i) = sy(i) + sa*sx(i)                                        
        sy(i + 1) = sy(i + 1) + sa*sx(i + 1)                            
        sy(i + 2) = sy(i + 2) + sa*sx(i + 2)                            
        sy(i + 3) = sy(i + 3) + sa*sx(i + 3)                            
   50 continue                                                          
      return                                                            
      end                                                               

      subroutine  sconst(n,sa,sx,incx)                                  
c                                                                       
c     replaces components of a vector by a constant.                    
c     uses unrolled loops for increment equal to 1.                     
c     william w. symes, 11/11/94.                                       
c                                                                       
      real sa,sx(*)                                                     
      integer i,incx,m,mp1,n,nincx                                      
c                                                                       
      if(n.le.0)return                                                  
      if(incx.eq.1)go to 20                                             
c                                                                       
c        code for increment not equal to 1                              
c                                                                       
      nincx = n*incx                                                    
      do 10 i = 1,nincx,incx                                            
        sx(i) = sa                                                      
   10 continue                                                          
      return                                                            
c                                                                       
c        code for increment equal to 1                                  
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,5)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        sx(i) = sa                                                      
   30 continue                                                          
      if( n .lt. 5 ) return                                             
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,5                                                 
        sx(i) = sa                                                      
        sx(i + 1) = sa                                                  
        sx(i + 2) = sa                                                  
        sx(i + 3) = sa                                                  
        sx(i + 4) = sa                                                  
   50 continue                                                          
      return                                                            
      end                                                               

      subroutine  scopy(n,sx,incx,sy,incy)                              
c                                                                       
c     copies a vector, x, to a vector, y.                               
c     uses unrolled loops for increments equal to 1.                    
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sx(*),sy(*)                                                  
      integer i,incx,incy,ix,iy,m,mp1,n                                 
c                                                                       
      if(n.le.0)return                                                  
      if(incx.eq.1.and.incy.eq.1)go to 20                               
c                                                                       
c        code for unequal increments or equal increments                
c          not equal to 1                                               
c                                                                       
      ix = 1                                                            
      iy = 1                                                            
      if(incx.lt.0)ix = (-n+1)*incx + 1                                 
      if(incy.lt.0)iy = (-n+1)*incy + 1                                 
      do 10 i = 1,n                                                     
        sy(iy) = sx(ix)                                                 
        ix = ix + incx                                                  
        iy = iy + incy                                                  
   10 continue                                                          
      return                                                            
c                                                                       
c        code for both increments equal to 1                            
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,7)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        sy(i) = sx(i)                                                   
   30 continue                                                          
      if( n .lt. 7 ) return                                             
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,7                                                 
        sy(i) = sx(i)                                                   
        sy(i + 1) = sx(i + 1)                                           
        sy(i + 2) = sx(i + 2)                                           
        sy(i + 3) = sx(i + 3)                                           
        sy(i + 4) = sx(i + 4)                                           
        sy(i + 5) = sx(i + 5)                                           
        sy(i + 6) = sx(i + 6)                                           
   50 continue                                                          
      return                                                            
      end                                                               
      subroutine  sdiv(n,t,sx,incx,sy,incy)                             
c                                                                       
c     componentwise division of two vectors.                            
c     uses unrolled loops for increment equal to 1.                     
c     assumes that denominator is positive and always
c     greater than t
c     after linpack blas - wws 310596                
c                                                    
      real t,sx(*),sy(*)                             
      integer i,incx,incy,m,mp1,n                    
c                                                                       
      if(n.le.0)return                                                  
      if((incx.eq.1).and.(incy.eq.1)) go to 20                          
c                                                                       
c        code for increment not equal to 1                              
c                                                                       
      do 10 i = 1,n                                                     
        sx(1+(i-1)*incx) = sx(1+(i-1)*incx)/max(t,sy(1+(i-1)*incy))     
   10 continue                                                          
      return                                                            
c                                                                       
c        code for increment equal to 1                                  
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,5)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        sx(i) = sx(i)/amax1(t,sy(i))                                    
   30 continue                                                          
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,5                                                 
        sx(i) = sx(i)/amax1(t,sy(i))                                    
        sx(i + 1) = sx(i+1)/amax1(t,sy(i + 1))                          
        sx(i + 2) = sx(i+2)/amax1(t,sy(i + 2))                          
        sx(i + 3) = sx(i+3)/amax1(t,sy(i + 3))                          
        sx(i + 4) = sx(i+4)/amax1(t,sy(i + 4))                          
   50 continue                                                                
      return 
      end                                                               
      real function sdot(n,sx,incx,sy,incy)                             
c                                                                       
c     forms the dot product of two vectors.                             
c     uses unrolled loops for increments equal to one.                  
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sx(*),sy(*),stemp                                            
      integer i,incx,incy,ix,iy,m,mp1,n                                 
c                                                                       
      stemp = 0.0e0                                                     
      sdot = 0.0e0                                                      
      if(n.le.0)return                                                  
      if(incx.eq.1.and.incy.eq.1)go to 20                               
c                                                                       
c        code for unequal increments or equal increments                
c          not equal to 1                                               
c                                                                       
      ix = 1                                                            
      iy = 1                                                            
      if(incx.lt.0)ix = (-n+1)*incx + 1                                 
      if(incy.lt.0)iy = (-n+1)*incy + 1                                 
      do 10 i = 1,n                                                     
        stemp = stemp + sx(ix)*sy(iy)                                   
        ix = ix + incx                                                  
        iy = iy + incy                                                  
   10 continue                                                          
      sdot = stemp                                                      
      return                                                            
c                                                                       
c        code for both increments equal to 1                            
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,5)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        stemp = stemp + sx(i)*sy(i)                                     
   30 continue                                                          
      if( n .lt. 5 ) go to 60                                           
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,5                                                 
        stemp = stemp + sx(i)*sy(i) + sx(i + 1)*sy(i + 1) +             
     *   sx(i + 2)*sy(i + 2) + sx(i + 3)*sy(i + 3) + sx(i + 4)*sy(i + 4)
   50 continue                                                          
   60 sdot = stemp                                                      
      return                                                            
      end                                                               
      subroutine sgeco(a,lda,n,ipvt,rcond,z)
      integer lda,n,ipvt(1)
      real a(lda,1),z(1)
      real rcond
c
c     sgeco factors a real matrix by gaussian elimination
c     and estimates the condition of the matrix.
c
c     if  rcond  is not needed, sgefa is slightly faster.
c     to solve  a*x = b , follow sgeco by sgesl.
c     to compute  inverse(a)*c , follow sgeco by sgesl.
c     to compute  determinant(a) , follow sgeco by sgedi.
c     to compute  inverse(a) , follow sgeco by sgedi.
c
c     on entry
c
c        a       real(lda, n)
c                the matrix to be factored.
c
c        lda     integer
c                the leading dimension of the array  a .
c
c        n       integer
c                the order of the matrix  a .
c
c     on return
c
c        a       an upper triangular matrix and the multipliers
c                which were used to obtain it.
c                the factorization can be written  a = l*u  where
c                l  is a product of permutation and unit lower
c                triangular matrices and  u  is upper triangular.
c
c        ipvt    integer(n)
c                an integer vector of pivot indices.
c
c        rcond   real
c                an estimate of the reciprocal condition of  a .
c                for the system  a*x = b , relative perturbations
c                in  a  and  b  of size  epsilon  may cause
c                relative perturbations in  x  of size  epsilon/rcond .
c                if  rcond  is so small that the logical expression
c                           1.0 + rcond .eq. 1.0
c                is true, then  a  may be singular to working
c                precision.  in particular,  rcond  is zero  if
c                exact singularity is detected or the estimate
c                underflows.
c
c        z       real(n)
c                a work vector whose contents are usually unimportant.
c                if  a  is close to a singular matrix, then  z  is
c                an approximate null vector in the sense that
c                norm(a*z) = rcond*norm(a)*norm(z) .
c
c     linpack. this version dated 08/14/78 .
c     cleve moler, university of new mexico, argonne national lab.
c
c     subroutines and functions
c
c     linpack sgefa
c     blas lsaxpy,lsdot,lsscal,sasum
c     fortran abs,amax1,sign
c
c     internal variables
c
      real lsdot,ek,t,wk,wkm
      real anorm,s,sasum,sm,ynorm
      integer info,j,k,kb,kp1,l
c
c
c     compute 1-norm of a
c
      anorm = 0.0e0
      do 10 j = 1, n
         anorm = amax1(anorm,sasum(n,a(1,j),1))
   10 continue
c
c     factor
c
      call sgefa(a,lda,n,ipvt,info)
c
c     rcond = 1/(norm(a)*(estimate of norm(inverse(a)))) .
c     estimate = norm(z)/norm(y) where  a*z = y  and  trans(a)*y = e .
c     trans(a)  is the transpose of a .  the components of  e  are
c     chosen to cause maximum local growth in the elements of w  where
c     trans(u)*w = e .  the vectors are frequently rescaled to avoid
c     overflow.
c
c     solve trans(u)*w = e
c
      ek = 1.0e0
      do 20 j = 1, n
         z(j) = 0.0e0
   20 continue
      do 100 k = 1, n
         if (z(k) .ne. 0.0e0) ek = sign(ek,-z(k))
         if (abs(ek-z(k)) .le. abs(a(k,k))) go to 30
            s = abs(a(k,k))/abs(ek-z(k))
            call sscal(n,s,z,1)
            ek = s*ek
   30    continue
         wk = ek - z(k)
         wkm = -ek - z(k)
         s = abs(wk)
         sm = abs(wkm)
         if (a(k,k) .eq. 0.0e0) go to 40
            wk = wk/a(k,k)
            wkm = wkm/a(k,k)
         go to 50
   40    continue
            wk = 1.0e0
            wkm = 1.0e0
   50    continue
         kp1 = k + 1
         if (kp1 .gt. n) go to 90
            do 60 j = kp1, n
               sm = sm + abs(z(j)+wkm*a(k,j))
               z(j) = z(j) + wk*a(k,j)
               s = s + abs(z(j))
   60       continue
            if (s .ge. sm) go to 80
               t = wkm - wk
               wk = wkm
               do 70 j = kp1, n
                  z(j) = z(j) + t*a(k,j)
   70          continue
   80       continue
   90    continue
         z(k) = wk
  100 continue
      s = 1.0e0/sasum(n,z,1)
      call sscal(n,s,z,1)
c
c     solve trans(l)*y = w
c
      do 120 kb = 1, n
         k = n + 1 - kb
         if (k .lt. n) z(k) = z(k) + lsdot(n-k,a(k+1,k),1,z(k+1),1)
         if (abs(z(k)) .le. 1.0e0) go to 110
            s = 1.0e0/abs(z(k))
            call sscal(n,s,z,1)
  110    continue
         l = ipvt(k)
         t = z(l)
         z(l) = z(k)
         z(k) = t
  120 continue
      s = 1.0e0/sasum(n,z,1)
      call sscal(n,s,z,1)
c
      ynorm = 1.0e0
c
c     solve l*v = y
c
      do 140 k = 1, n
         l = ipvt(k)
         t = z(l)
         z(l) = z(k)
         z(k) = t
         if (k .lt. n) call lsaxpy(n-k,t,a(k+1,k),1,z(k+1),1)
         if (abs(z(k)) .le. 1.0e0) go to 130
            s = 1.0e0/abs(z(k))
            call sscal(n,s,z,1)
            ynorm = s*ynorm
  130    continue
  140 continue
      s = 1.0e0/sasum(n,z,1)
      call sscal(n,s,z,1)
      ynorm = s*ynorm
c
c     solve  u*z = v
c
      do 160 kb = 1, n
         k = n + 1 - kb
         if (abs(z(k)) .le. abs(a(k,k))) go to 150
            s = abs(a(k,k))/abs(z(k))
            call sscal(n,s,z,1)
            ynorm = s*ynorm
  150    continue
         if (a(k,k) .ne. 0.0e0) z(k) = z(k)/a(k,k)
         if (a(k,k) .eq. 0.0e0) z(k) = 1.0e0
         t = -z(k)
         call lsaxpy(k-1,t,a(1,k),1,z(1),1)
  160 continue
c     make znorm = 1.0
      s = 1.0e0/sasum(n,z,1)
      call sscal(n,s,z,1)
      ynorm = s*ynorm
c
      if (anorm .ne. 0.0e0) rcond = ynorm/anorm
      if (anorm .eq. 0.0e0) rcond = 0.0e0
      return
      end
      integer function isamax(n,sx,incx)
c
c     finds the index of element having max. absolute value.
c     jack dongarra, linpack, 3/11/78.
c     modified 3/93 to return if incx .le. 0.
c     modified 12/3/93, array(1) declarations changed to array(*)
c
      real sx(*),smax
      integer i,incx,ix,n
c
      isamax = 0
      if( n.lt.1 .or. incx.le.0 ) return
      isamax = 1
      if(n.eq.1)return
      if(incx.eq.1)go to 20
c
c        code for increment not equal to 1
c
      ix = 1
      smax = abs(sx(1))
      ix = ix + incx
      do 10 i = 2,n
         if(abs(sx(ix)).le.smax) go to 5
         isamax = i
         smax = abs(sx(ix))
    5    ix = ix + incx
   10 continue
      return
c
c        code for increment equal to 1
c
   20 smax = abs(sx(1))
      do 30 i = 2,n
         if(abs(sx(i)).le.smax) go to 30
         isamax = i
         smax = abs(sx(i))
   30 continue
      return
      end
      real function sasum(n,sx,incx)
c
c     takes the sum of the absolute values.
c     uses unrolled loops for increment equal to one.
c     jack dongarra, linpack, 3/11/78.
c     modified 3/93 to return if incx .le. 0.
c     modified 12/3/93, array(1) declarations changed to array(*)
c
      real sx(*),stemp
      integer i,incx,m,mp1,n,nincx
c
      sasum = 0.0e0
      stemp = 0.0e0
      if( n.le.0 .or. incx.le.0 )return
      if(incx.eq.1)go to 20
c
c        code for increment not equal to 1
c
      nincx = n*incx
      do 10 i = 1,nincx,incx
        stemp = stemp + abs(sx(i))
   10 continue
      sasum = stemp
      return
c
c        code for increment equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,6)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        stemp = stemp + abs(sx(i))
   30 continue
      if( n .lt. 6 ) go to 60
   40 mp1 = m + 1
      do 50 i = mp1,n,6
        stemp = stemp + abs(sx(i)) + abs(sx(i + 1)) + abs(sx(i + 2))
     *  + abs(sx(i + 3)) + abs(sx(i + 4)) + abs(sx(i + 5))
   50 continue
   60 sasum = stemp
      return
      end
      subroutine sscal(n,sa,sx,incx)
c
c     scales a vector by a constant.
c     uses unrolled loops for increment equal to 1.
c     jack dongarra, linpack, 3/11/78.
c     modified 3/93 to return if incx .le. 0.
c     modified 12/3/93, array(1) declarations changed to array(*)
c
      real sa,sx(*)
      integer i,incx,m,mp1,n,nincx
c
      if( n.le.0 .or. incx.le.0 )return
      if(incx.eq.1)go to 20
c
c        code for increment not equal to 1
c
      nincx = n*incx
      do 10 i = 1,nincx,incx
        sx(i) = sa*sx(i)
   10 continue
      return
c
c        code for increment equal to 1
c
c
c        clean-up loop
c
   20 m = mod(n,5)
      if( m .eq. 0 ) go to 40
      do 30 i = 1,m
        sx(i) = sa*sx(i)
   30 continue
      if( n .lt. 5 ) return
   40 mp1 = m + 1
      do 50 i = mp1,n,5
        sx(i) = sa*sx(i)
        sx(i + 1) = sa*sx(i + 1)
        sx(i + 2) = sa*sx(i + 2)
        sx(i + 3) = sa*sx(i + 3)
        sx(i + 4) = sa*sx(i + 4)
   50 continue
      return
      end
      subroutine sgefa(a,lda,n,ipvt,info)
      integer lda,n,ipvt(1),info
      real a(lda,1)
c
c     sgefa factors a real matrix by gaussian elimination.
c
c     sgefa is usually called by sgeco, but it can be called
c     directly with a saving in time if  rcond  is not needed.
c     (time for sgeco) = (1 + 9/n)*(time for sgefa) .
c
c     on entry
c
c        a       real(lda, n)
c                the matrix to be factored.
c
c        lda     integer
c                the leading dimension of the array  a .
c
c        n       integer
c                the order of the matrix  a .
c
c     on return
c
c        a       an upper triangular matrix and the multipliers
c                which were used to obtain it.
c                the factorization can be written  a = l*u  where
c                l  is a product of permutation and unit lower
c                triangular matrices and  u  is upper triangular.
c
c        ipvt    integer(n)
c                an integer vector of pivot indices.
c
c        info    integer
c                = 0  normal value.
c                = k  if  u(k,k) .eq. 0.0 .  this is not an error
c                     condition for this subroutine, but it does
c                     indicate that sgesl or sgedi will divide by zero
c                     if called.  use  rcond  in sgeco for a reliable
c                     indication of singularity.
c
c     linpack. this version dated 08/14/78 .
c     cleve moler, university of new mexico, argonne national lab.
c
c     subroutines and functions
c
c     blas lsaxpy,lsscal,isamax
c
c     internal variables
c
      real t
      integer isamax,j,k,kp1,l,nm1
c
c
c     gaussian elimination with partial pivoting
c
      info = 0
      nm1 = n - 1
      if (nm1 .lt. 1) go to 70
      do 60 k = 1, nm1
         kp1 = k + 1
c
c        find l = pivot index
c
         l = isamax(n-k+1,a(k,k),1) + k - 1
         ipvt(k) = l
c
c        zero pivot implies this column already triangularized
c
         if (a(l,k) .eq. 0.0e0) go to 40
c
c           interchange if necessary
c
            if (l .eq. k) go to 10
               t = a(l,k)
               a(l,k) = a(k,k)
               a(k,k) = t
   10       continue
c
c           compute multipliers
c
            t = -1.0e0/a(k,k)
            call sscal(n-k,t,a(k+1,k),1)
c
c           row elimination with column indexing
c
            do 30 j = kp1, n
               t = a(l,j)
               if (l .eq. k) go to 20
                  a(l,j) = a(k,j)
                  a(k,j) = t
   20          continue
               call lsaxpy(n-k,t,a(k+1,k),1,a(k+1,j),1)
   30       continue
         go to 50
   40    continue
            info = k
   50    continue
   60 continue
   70 continue
      ipvt(n) = n
      if (a(n,n) .eq. 0.0e0) info = n
      return
      end
      subroutine sgesl(a,lda,n,ipvt,b,job)
      integer lda,n,ipvt(1),job
      real a(lda,1),b(1)
c
c     sgesl solves the real system
c     a * x = b  or  trans(a) * x = b
c     using the factors computed by sgeco or sgefa.
c
c     on entry
c
c        a       real(lda, n)
c                the output from sgeco or sgefa.
c
c        lda     integer
c                the leading dimension of the array  a .
c
c        n       integer
c                the order of the matrix  a .
c
c        ipvt    integer(n)
c                the pivot vector from sgeco or sgefa.
c
c        b       real(n)
c                the right hand side vector.
c
c        job     integer
c                = 0         to solve  a*x = b ,
c                = nonzero   to solve  trans(a)*x = b  where
c                            trans(a)  is the transpose.
c
c     on return
c
c        b       the solution vector  x .
c
c     error condition
c
c        a division by zero will occur if the input factor contains a
c        zero on the diagonal.  technically this indicates singularity
c        but it is often caused by improper arguments or improper
c        setting of lda .  it will not occur if the subroutines are
c        called correctly and if sgeco has set rcond .gt. 0.0
c        or sgefa has set info .eq. 0 .
c
c     to compute  inverse(a) * c  where  c  is a matrix
c     with  p  columns
c           call sgeco(a,lda,n,ipvt,rcond,z)
c           if (rcond is too small) go to ...
c           do 10 j = 1, p
c              call sgesl(a,lda,n,ipvt,c(1,j),0)
c        10 continue
c
c     linpack. this version dated 08/14/78 .
c     cleve moler, university of new mexico, argonne national lab.
c
c     subroutines and functions
c
c     blas lsaxpy,lsdot
c
c     internal variables
c
      real lsdot,t
      integer k,kb,l,nm1
c
      nm1 = n - 1
      if (job .ne. 0) go to 50
c
c        job = 0 , solve  a * x = b
c        first solve  l*y = b
c
         if (nm1 .lt. 1) go to 30
         do 20 k = 1, nm1
            l = ipvt(k)
            t = b(l)
            if (l .eq. k) go to 10
               b(l) = b(k)
               b(k) = t
   10       continue
            call lsaxpy(n-k,t,a(k+1,k),1,b(k+1),1)
   20    continue
   30    continue
c
c        now solve  u*x = y
c
         do 40 kb = 1, n
            k = n + 1 - kb
            b(k) = b(k)/a(k,k)
            t = -b(k)
            call lsaxpy(k-1,t,a(1,k),1,b(1),1)
   40    continue
      go to 100
   50 continue
c
c        job = nonzero, solve  trans(a) * x = b
c        first solve  trans(u)*y = b
c
         do 60 k = 1, n
            t = lsdot(k-1,a(1,k),1,b(1),1)
            b(k) = (b(k) - t)/a(k,k)
   60    continue
c
c        now solve trans(l)*x = y
c
         if (nm1 .lt. 1) go to 90
         do 80 kb = 1, nm1
            k = n - kb
            b(k) = b(k) + lsdot(n-k,a(k+1,k),1,b(k+1),1)
            l = ipvt(k)
            if (l .eq. k) go to 70
               t = b(l)
               b(l) = b(k)
               b(k) = t
   70       continue
   80    continue
   90    continue
  100 continue
      return
      end
      subroutine sincost(m,n,x,xt,wsave)
c==========================================================================
c
c w. w. symes, 2/91
c
c interface with vfftpk routines to implement 2d fast sine/cosine transform
c of an input array x(1:m,1:n), stored as a linear array x(*).
c uses workspace arrays xt(*) and wsave(*).
c
c the columns of x are sine-transformed; the rows are
c cosine-transformed.
c
c very important: it is assumed that the columns of the
c input array x satisfy dirichlet conditions at the bottom, i.e.
c that x(m,:)=0. thus the nonredundant part of x is, say,
c the first (m-1) rows. therefore x transpose is passed to
c the vfftpak routine vsint as an nx(m-1) array, with
c mdimx = m. this automatically takes care of the need in
c vsint for an extra column of workspace. as a result, the last
c column of x transpose (haveing been used as workspace by vsint) must be
c zeroed out again.
c
c also very important: the vfftpk routines vsint and vcost are 
c efficient if the row length of the input (i.e. the number of 
c columns) is n+1, resp. n-1, where n is a product of small
c primes. thus if x has dimensions m,n, m and n-1 should be
c products of small primes. the obvious way to arrange this,
c if say x lives somewhere else as a 2^k x 2^j matrix, is to
c pad x with an extra copy of its last column (this is consistent
c with the neumann condition in the column direction).
c
c=========================================================================
c
      integer m,n

      real x(*), xt(*), wsave(*)

      integer i, m1, n1

      m1=m-1
      n1=n-1

c transform the rows if n > 1:

      if (n.gt.1) then
         call vcosti(n,wsave)
         call vcost(m1,n,x,xt,m,wsave)
      end if

c transpose x into xt

      do i=1,m
         call scopy(n,x(i),m,xt((i-1)*n+1),1)
      end do

c transform the columns

      call vsinti(m1,wsave)

      call vsint(n,m1,xt,x,n,wsave)
      do i=1,n
         xt(m1*n + i)=0.0
      end do

c transpose back

      do i=1,n
         call scopy(m,xt(i),n,x((i-1)*m+1),1)
      end do

      return
      end
      subroutine sint (n,x,wsave)
      dimension       x(1)       ,wsave(1)
      np1 = n+1
      iw1 = n/2+1
      iw2 = iw1+np1
      iw3 = iw2+np1
      call sint1_1(n,x,wsave,wsave(iw1),wsave(iw2),wsave(iw3))
      return
      end
      subroutine sint1_1(n,war,was,xh,x,ifac)
      dimension war(1),was(1),x(1),xh(1),ifac(1)
      data sqrt3 /1.73205080756888/
      do 100 i=1,n
      xh(i) = war(i)
      war(i) = x(i)
  100 continue
      if (n-2) 101,102,103
  101 xh(1) = xh(1)+xh(1)
      go to 106
  102 xhold = sqrt3*(xh(1)+xh(2))
      xh(2) = sqrt3*(xh(1)-xh(2))
      xh(1) = xhold
      go to 106
  103 np1 = n+1
      ns2 = n/2
      x(1) = 0.
      do 104 k=1,ns2
         kc = np1-k
         t1 = xh(k)-xh(kc)
         t2 = was(k)*(xh(k)+xh(kc))
         x(k+1) = t1+t2
         x(kc+1) = t2-t1
  104 continue
      modn = mod(n,2)
      if (modn .ne. 0) x(ns2+2) = 4.*xh(ns2+1)
      call rfftf1_1 (np1,x,xh,war,ifac)
      xh(1) = .5*x(1)
      do 105 i=3,n,2
         xh(i-1) = -x(i)
         xh(i) = xh(i-2)+x(i-1)
  105 continue
      if (modn .ne. 0) go to 106
      xh(n) = -x(n+1)
  106 do 107 i=1,n
      x(i) = war(i)
      war(i) = xh(i)
  107 continue
      return
      end
      subroutine radf2_1 (ido,l1,cc,ch,wa1)
      dimension       ch(ido,2,l1)           ,cc(ido,l1,2)           ,
     1                wa1(1)
      do 101 k=1,l1
         ch(1,1,k) = cc(1,k,1)+cc(1,k,2)
         ch(ido,2,k) = cc(1,k,1)-cc(1,k,2)
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            tr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            ti2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            ch(i,1,k) = cc(i,k,1)+ti2
            ch(ic,2,k) = ti2-cc(i,k,1)
            ch(i-1,1,k) = cc(i-1,k,1)+tr2
            ch(ic-1,2,k) = cc(i-1,k,1)-tr2
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 do 106 k=1,l1
         ch(1,2,k) = -cc(ido,k,2)
         ch(ido,1,k) = cc(ido,k,1)
  106 continue
  107 return
      end
      subroutine radf3_1 (ido,l1,cc,ch,wa1,wa2)
      dimension       ch(ido,3,l1)           ,cc(ido,l1,3)           ,
     1                wa1(1)     ,wa2(1)
      data taur,taui /-.5,.866025403784439/
      do 101 k=1,l1
         cr2 = cc(1,k,2)+cc(1,k,3)
         ch(1,1,k) = cc(1,k,1)+cr2
         ch(1,3,k) = taui*(cc(1,k,3)-cc(1,k,2))
         ch(ido,2,k) = cc(1,k,1)+taur*cr2
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            dr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            di2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            dr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            di3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            cr2 = dr2+dr3
            ci2 = di2+di3
            ch(i-1,1,k) = cc(i-1,k,1)+cr2
            ch(i,1,k) = cc(i,k,1)+ci2
            tr2 = cc(i-1,k,1)+taur*cr2
            ti2 = cc(i,k,1)+taur*ci2
            tr3 = taui*(di2-di3)
            ti3 = taui*(dr3-dr2)
            ch(i-1,3,k) = tr2+tr3
            ch(ic-1,2,k) = tr2-tr3
            ch(i,3,k) = ti2+ti3
            ch(ic,2,k) = ti3-ti2
  102    continue
  103 continue
      return
      end
      subroutine radf4_1 (ido,l1,cc,ch,wa1,wa2,wa3)
      dimension       cc(ido,l1,4)           ,ch(ido,4,l1)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)
      data hsqt2 /.7071067811865475/
      do 101 k=1,l1
         tr1 = cc(1,k,2)+cc(1,k,4)
         tr2 = cc(1,k,1)+cc(1,k,3)
         ch(1,1,k) = tr1+tr2
         ch(ido,4,k) = tr2-tr1
         ch(ido,2,k) = cc(1,k,1)-cc(1,k,3)
         ch(1,3,k) = cc(1,k,4)-cc(1,k,2)
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            cr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            ci2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            cr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            ci3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            cr4 = wa3(i-2)*cc(i-1,k,4)+wa3(i-1)*cc(i,k,4)
            ci4 = wa3(i-2)*cc(i,k,4)-wa3(i-1)*cc(i-1,k,4)
            tr1 = cr2+cr4
            tr4 = cr4-cr2
            ti1 = ci2+ci4
            ti4 = ci2-ci4
            ti2 = cc(i,k,1)+ci3
            ti3 = cc(i,k,1)-ci3
            tr2 = cc(i-1,k,1)+cr3
            tr3 = cc(i-1,k,1)-cr3
            ch(i-1,1,k) = tr1+tr2
            ch(ic-1,4,k) = tr2-tr1
            ch(i,1,k) = ti1+ti2
            ch(ic,4,k) = ti1-ti2
            ch(i-1,3,k) = ti4+tr3
            ch(ic-1,2,k) = tr3-ti4
            ch(i,3,k) = tr4+ti3
            ch(ic,2,k) = tr4-ti3
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 continue
      do 106 k=1,l1
         ti1 = -hsqt2*(cc(ido,k,2)+cc(ido,k,4))
         tr1 = hsqt2*(cc(ido,k,2)-cc(ido,k,4))
         ch(ido,1,k) = tr1+cc(ido,k,1)
         ch(ido,3,k) = cc(ido,k,1)-tr1
         ch(1,2,k) = ti1-cc(ido,k,3)
         ch(1,4,k) = ti1+cc(ido,k,3)
  106 continue
  107 return
      end
      subroutine radf5_1 (ido,l1,cc,ch,wa1,wa2,wa3,wa4)
      dimension       cc(ido,l1,5)           ,ch(ido,5,l1)           ,
     1                wa1(1)     ,wa2(1)     ,wa3(1)     ,wa4(1)
      data tr11,ti11,tr12,ti12 /.309016994374947,.951056516295154,
     1-.809016994374947,.587785252292473/
      do 101 k=1,l1
         cr2 = cc(1,k,5)+cc(1,k,2)
         ci5 = cc(1,k,5)-cc(1,k,2)
         cr3 = cc(1,k,4)+cc(1,k,3)
         ci4 = cc(1,k,4)-cc(1,k,3)
         ch(1,1,k) = cc(1,k,1)+cr2+cr3
         ch(ido,2,k) = cc(1,k,1)+tr11*cr2+tr12*cr3
         ch(1,3,k) = ti11*ci5+ti12*ci4
         ch(ido,4,k) = cc(1,k,1)+tr12*cr2+tr11*cr3
         ch(1,5,k) = ti12*ci5-ti11*ci4
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            dr2 = wa1(i-2)*cc(i-1,k,2)+wa1(i-1)*cc(i,k,2)
            di2 = wa1(i-2)*cc(i,k,2)-wa1(i-1)*cc(i-1,k,2)
            dr3 = wa2(i-2)*cc(i-1,k,3)+wa2(i-1)*cc(i,k,3)
            di3 = wa2(i-2)*cc(i,k,3)-wa2(i-1)*cc(i-1,k,3)
            dr4 = wa3(i-2)*cc(i-1,k,4)+wa3(i-1)*cc(i,k,4)
            di4 = wa3(i-2)*cc(i,k,4)-wa3(i-1)*cc(i-1,k,4)
            dr5 = wa4(i-2)*cc(i-1,k,5)+wa4(i-1)*cc(i,k,5)
            di5 = wa4(i-2)*cc(i,k,5)-wa4(i-1)*cc(i-1,k,5)
            cr2 = dr2+dr5
            ci5 = dr5-dr2
            cr5 = di2-di5
            ci2 = di2+di5
            cr3 = dr3+dr4
            ci4 = dr4-dr3
            cr4 = di3-di4
            ci3 = di3+di4
            ch(i-1,1,k) = cc(i-1,k,1)+cr2+cr3
            ch(i,1,k) = cc(i,k,1)+ci2+ci3
            tr2 = cc(i-1,k,1)+tr11*cr2+tr12*cr3
            ti2 = cc(i,k,1)+tr11*ci2+tr12*ci3
            tr3 = cc(i-1,k,1)+tr12*cr2+tr11*cr3
            ti3 = cc(i,k,1)+tr12*ci2+tr11*ci3
            tr5 = ti11*cr5+ti12*cr4
            ti5 = ti11*ci5+ti12*ci4
            tr4 = ti12*cr5-ti11*cr4
            ti4 = ti12*ci5-ti11*ci4
            ch(i-1,3,k) = tr2+tr5
            ch(ic-1,2,k) = tr2-tr5
            ch(i,3,k) = ti2+ti5
            ch(ic,2,k) = ti5-ti2
            ch(i-1,5,k) = tr3+tr4
            ch(ic-1,4,k) = tr3-tr4
            ch(i,5,k) = ti3+ti4
            ch(ic,4,k) = ti4-ti3
  102    continue
  103 continue
      return
      end
      subroutine radfg_1 (ido,ip,l1,idl1,cc,c1,c2,ch,ch2,wa)
      dimension       ch(ido,l1,ip)          ,cc(ido,ip,l1)          ,
     1                c1(ido,l1,ip)          ,c2(idl1,ip),
     2                ch2(idl1,ip)           ,wa(1)
      data tpi/6.28318530717959/
      arg = tpi/float(ip)
      dcp = cos(arg)
      dsp = sin(arg)
      ipph = (ip+1)/2
      ipp2 = ip+2
      idp2 = ido+2
      nbd = (ido-1)/2
      if (ido .eq. 1) go to 119
      do 101 ik=1,idl1
         ch2(ik,1) = c2(ik,1)
  101 continue
      do 103 j=2,ip
         do 102 k=1,l1
            ch(1,k,j) = c1(1,k,j)
  102    continue
  103 continue
      if (nbd .gt. l1) go to 107
      is = -ido
      do 106 j=2,ip
         is = is+ido
         idij = is
         do 105 i=3,ido,2
            idij = idij+2
            do 104 k=1,l1
               ch(i-1,k,j) = wa(idij-1)*c1(i-1,k,j)+wa(idij)*c1(i,k,j)
               ch(i,k,j) = wa(idij-1)*c1(i,k,j)-wa(idij)*c1(i-1,k,j)
  104       continue
  105    continue
  106 continue
      go to 111
  107 is = -ido
      do 110 j=2,ip
         is = is+ido
         do 109 k=1,l1
            idij = is
            do 108 i=3,ido,2
               idij = idij+2
               ch(i-1,k,j) = wa(idij-1)*c1(i-1,k,j)+wa(idij)*c1(i,k,j)
               ch(i,k,j) = wa(idij-1)*c1(i,k,j)-wa(idij)*c1(i-1,k,j)
  108       continue
  109    continue
  110 continue
  111 if (nbd .lt. l1) go to 115
      do 114 j=2,ipph
         jc = ipp2-j
         do 113 k=1,l1
            do 112 i=3,ido,2
               c1(i-1,k,j) = ch(i-1,k,j)+ch(i-1,k,jc)
               c1(i-1,k,jc) = ch(i,k,j)-ch(i,k,jc)
               c1(i,k,j) = ch(i,k,j)+ch(i,k,jc)
               c1(i,k,jc) = ch(i-1,k,jc)-ch(i-1,k,j)
  112       continue
  113    continue
  114 continue
      go to 121
  115 do 118 j=2,ipph
         jc = ipp2-j
         do 117 i=3,ido,2
            do 116 k=1,l1
               c1(i-1,k,j) = ch(i-1,k,j)+ch(i-1,k,jc)
               c1(i-1,k,jc) = ch(i,k,j)-ch(i,k,jc)
               c1(i,k,j) = ch(i,k,j)+ch(i,k,jc)
               c1(i,k,jc) = ch(i-1,k,jc)-ch(i-1,k,j)
  116       continue
  117    continue
  118 continue
      go to 121
  119 do 120 ik=1,idl1
         c2(ik,1) = ch2(ik,1)
  120 continue
  121 do 123 j=2,ipph
         jc = ipp2-j
         do 122 k=1,l1
            c1(1,k,j) = ch(1,k,j)+ch(1,k,jc)
            c1(1,k,jc) = ch(1,k,jc)-ch(1,k,j)
  122    continue
  123 continue
c
      ar1 = 1.
      ai1 = 0.
      do 127 l=2,ipph
         lc = ipp2-l
         ar1h = dcp*ar1-dsp*ai1
         ai1 = dcp*ai1+dsp*ar1
         ar1 = ar1h
         do 124 ik=1,idl1
            ch2(ik,l) = c2(ik,1)+ar1*c2(ik,2)
            ch2(ik,lc) = ai1*c2(ik,ip)
  124    continue
         dc2 = ar1
         ds2 = ai1
         ar2 = ar1
         ai2 = ai1
         do 126 j=3,ipph
            jc = ipp2-j
            ar2h = dc2*ar2-ds2*ai2
            ai2 = dc2*ai2+ds2*ar2
            ar2 = ar2h
            do 125 ik=1,idl1
               ch2(ik,l) = ch2(ik,l)+ar2*c2(ik,j)
               ch2(ik,lc) = ch2(ik,lc)+ai2*c2(ik,jc)
  125       continue
  126    continue
  127 continue
      do 129 j=2,ipph
         do 128 ik=1,idl1
            ch2(ik,1) = ch2(ik,1)+c2(ik,j)
  128    continue
  129 continue
c
      if (ido .lt. l1) go to 132
      do 131 k=1,l1
         do 130 i=1,ido
            cc(i,1,k) = ch(i,k,1)
  130    continue
  131 continue
      go to 135
  132 do 134 i=1,ido
         do 133 k=1,l1
            cc(i,1,k) = ch(i,k,1)
  133    continue
  134 continue
  135 do 137 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 136 k=1,l1
            cc(ido,j2-2,k) = ch(1,k,j)
            cc(1,j2-1,k) = ch(1,k,jc)
  136    continue
  137 continue
      if (ido .eq. 1) return
      if (nbd .lt. l1) go to 141
      do 140 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 139 k=1,l1
            do 138 i=3,ido,2
               ic = idp2-i
               cc(i-1,j2-1,k) = ch(i-1,k,j)+ch(i-1,k,jc)
               cc(ic-1,j2-2,k) = ch(i-1,k,j)-ch(i-1,k,jc)
               cc(i,j2-1,k) = ch(i,k,j)+ch(i,k,jc)
               cc(ic,j2-2,k) = ch(i,k,jc)-ch(i,k,j)
  138       continue
  139    continue
  140 continue
      return
  141 do 144 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 143 i=3,ido,2
            ic = idp2-i
            do 142 k=1,l1
               cc(i-1,j2-1,k) = ch(i-1,k,j)+ch(i-1,k,jc)
               cc(ic-1,j2-2,k) = ch(i-1,k,j)-ch(i-1,k,jc)
               cc(i,j2-1,k) = ch(i,k,j)+ch(i,k,jc)
               cc(ic,j2-2,k) = ch(i,k,jc)-ch(i,k,j)
  142       continue
  143    continue
  144 continue
      return
      end
      subroutine rfftf1_1 (n,c,ch,wa,ifac)
      dimension       ch(1)      ,c(1)       ,wa(1)      ,ifac(1)
      nf = ifac(2)
      na = 1
      l2 = n
      iw = n
      do 111 k1=1,nf
         kh = nf-k1
         ip = ifac(kh+3)
         l1 = l2/ip
         ido = n/l2
         idl1 = ido*l1
         iw = iw-(ip-1)*ido
         na = 1-na
         if (ip .ne. 4) go to 102
         ix2 = iw+ido
         ix3 = ix2+ido
         if (na .ne. 0) go to 101
         call radf4_1 (ido,l1,c,ch,wa(iw),wa(ix2),wa(ix3))
         go to 110
  101    call radf4_1 (ido,l1,ch,c,wa(iw),wa(ix2),wa(ix3))
         go to 110
  102    if (ip .ne. 2) go to 104
         if (na .ne. 0) go to 103
         call radf2_1 (ido,l1,c,ch,wa(iw))
         go to 110
  103    call radf2_1 (ido,l1,ch,c,wa(iw))
         go to 110
  104    if (ip .ne. 3) go to 106
         ix2 = iw+ido
         if (na .ne. 0) go to 105
         call radf3_1 (ido,l1,c,ch,wa(iw),wa(ix2))
         go to 110
  105    call radf3_1 (ido,l1,ch,c,wa(iw),wa(ix2))
         go to 110
  106    if (ip .ne. 5) go to 108
         ix2 = iw+ido
         ix3 = ix2+ido
         ix4 = ix3+ido
         if (na .ne. 0) go to 107
         call radf5_1 (ido,l1,c,ch,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  107    call radf5_1 (ido,l1,ch,c,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  108    if (ido .eq. 1) na = 1-na
         if (na .ne. 0) go to 109
         call radfg_1 (ido,ip,l1,idl1,c,c,c,ch,ch,wa(iw))
         na = 1
         go to 110
  109    call radfg_1 (ido,ip,l1,idl1,ch,ch,ch,c,c,wa(iw))
         na = 0
  110    l2 = l1
  111 continue
      if (na .eq. 1) return
      do 112 i=1,n
         c(i) = ch(i)
  112 continue
      return
      end
      subroutine sint2d(m,n,x,xt,wsave)
c
c interface with vfftpk routines to implement 2d fast sine transform
c of an input array x(1:m,1:n), stored as a linear array x(*).
c uses workspace arrays xt(*) and wsave(*).
c
c very important: it is assumed that the columns of the
c input array x satisfy dirichlet conditions on the bottom and
c on the right both ends, i.e. that x(:,n)=x(m,:)=0. 
c thus the nonredundant part of x is, the (m-1)x(n-1) principal 
c submatrix. therefore x is passed to
c the vfftpak routine vsint as an (m-1)x(n-1) array, with
c mdimx = m. this automatically takes care of the need in
c vsint for an extra column of workspace. as a result, the last
c column of x (haveing been used as workspace by vsint) must be
c zeroed out again.
c
      integer m,n
c
      real x(*), xt(*), wsave(*)
c
      integer i, m1, n1
c
      m1=m-1
      n1=n-1
c
c transform the rows
c
      if (n.gt.1) then
         call vsinti(n1,wsave)
         call vsint(m,n1,x,xt,m,wsave)
         do i=1,m
            x(n1*m + i)=0.0
         end do
      end if
c
c transpose x into xt
c
      do i=1,m
         call scopy(n,x(i),m,xt((i-1)*n+1),1)
      end do
c
c transform the columns
c
      call vsinti(m1,wsave)
      call vsint(n,m1,xt,x,n,wsave)
      do i=1,n
         xt(m1*n + i)=0.0
      end do
c
c transpose back
c
      do i=1,n
         call scopy(m,xt(i),n,x((i-1)*m+1),1)
      end do
c
      return
      end
      subroutine sinti (n,wsave)
      dimension       wsave(1)
      data pi /3.14159265358979/
      if (n .le. 1) return
      ns2 = n/2
      np1 = n+1
      dt = pi/float(np1)
      do 101 k=1,ns2
         wsave(k) = 2.*sin(k*dt)
  101 continue
      call rffti_1 (np1,wsave(ns2+1))
      return
      end
      subroutine rffti_1 (n,wsave)
      dimension       wsave(1)
      if (n .eq. 1) return
      call rffti1_1 (n,wsave(n+1),wsave(2*n+1))
      return
      end
      subroutine rffti1_1 (n,wa,ifac)
      dimension       wa(1)      ,ifac(1)    ,ntryh(4)
      data ntryh(1),ntryh(2),ntryh(3),ntryh(4)/4,2,3,5/
      nl = n
      nf = 0
      j = 0
  101 j = j+1
      if (j-4) 102,102,103
  102 ntry = ntryh(j)
      go to 104
  103 ntry = ntry+2
  104 nq = nl/ntry
      nr = nl-ntry*nq
      if (nr) 101,105,101
  105 nf = nf+1
      ifac(nf+2) = ntry
      nl = nq
      if (ntry .ne. 2) go to 107
      if (nf .eq. 1) go to 107
      do 106 i=2,nf
         ib = nf-i+2
         ifac(ib+2) = ifac(ib+1)
  106 continue
      ifac(3) = 2
  107 if (nl .ne. 1) go to 104
      ifac(1) = n
      ifac(2) = nf
      tpi = 6.28318530717959
      argh = tpi/float(n)
      is = 0
      nfm1 = nf-1
      l1 = 1
      if (nfm1 .eq. 0) return
      do 110 k1=1,nfm1
         ip = ifac(k1+2)
         ld = 0
         l2 = l1*ip
         ido = n/l2
         ipm = ip-1
         do 109 j=1,ipm
            ld = ld+l1
            i = is
            argld = float(ld)*argh
            fi = 0.
            do 108 ii=3,ido,2
               i = i+2
               fi = fi+1.
               arg = fi*argld
               wa(i-1) = cos(arg)
               wa(i) = sin(arg)
  108       continue
            is = is+ido
  109    continue
         l1 = l2
  110 continue
      return
      end
      real function smach(job)                                          
      integer job                                                       
c                                                                       
c     smach computes machine parameters of floating point               
c     arithmetic for use in testing only.  not required by              
c     linpack proper.                                                   
c                                                                       
c     if trouble with automatic computation of these quantities,        
c     they can be set by direct assignment statements.                  
c     assume the computer has                                           
c                                                                       
c        b = base of arithmetic                                         
c        t = number of base  b  digits                                  
c        l = smallest possible exponent                                 
c        u = largest possible exponent                                  
c                                                                       
c     then                                                              
c                                                                       
c        eps = b**(1-t)                                                 
c        tiny = 100.0*b**(-l+t)                                         
c        huge = 0.01*b**(u-t)                                           
c                                                                       
c     dmach same as smach except t, l, u apply to                       
c     double precision.                                                 
c                                                                       
c     cmach same as smach except if complex division                    
c     is done by                                                        
c                                                                       
c        1/(x+i*y) = (x-i*y)/(x**2+y**2)                                
c                                                                       
c     then                                                              
c                                                                       
c        tiny = sqrt(tiny)                                              
c        huge = sqrt(huge)                                              
c                                                                       
c                                                                       
c     job is 1, 2 or 3 for epsilon, tiny and huge, respectively.        
c                                                                       
c                                                                       
      real eps,tiny,huge,s                                              
c                                                                       
      eps = 1.0                                                         
   10 eps = eps/2.0                                                     
      s = 1.0 + eps                                                     
      if (s .gt. 1.0) go to 10                                          
      eps = 2.0*eps                                                     
c                                                                       
      s = 1.0                                                           
   20 tiny = s                                                          
      s = s/16.0                                                        
      if (s*100. .ne. 0.0) go to 20                                     
      tiny = (tiny/eps)*100.0                                           
      huge = 1.0/tiny                                                   
c                                                                       
      if (job .eq. 1) smach = eps                                       
      if (job .eq. 2) smach = tiny                                      
      if (job .eq. 3) smach = huge                                      
      return                                                            
      end                                                               
      subroutine  smul(n,sx,incx,sy,incy)                               
c                                                                       
c     componentwise multiplication of two vectors.                      
c     uses unrolled loops for increment equal to 1.                     
c     after linpack blas - wws 310596                                   
c                                                                       
      real sx(*),sy(*)                                                  
      integer i,incx,incy,m,mp1,n                                       
c                                                                       
      if(n.le.0)return                                                  
      if((incx.eq.1).and.(incy.eq.1)) go to 20                          
c                                                                       
c        code for increment not equal to 1                              
c                                                                       
      do 10 i = 1,n                                                     
        sx(1+(i-1)*incx) = sx(1+(i-1)*incx)*sy(1+(i-1)*incy)            
   10 continue                                                          
      return                                                            
c                                                                       
c        code for increment equal to 1                                  
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,5)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        sx(i) = sy(i)*sx(i)                                             
   30 continue                                                          
      if( n .lt. 5 ) return                                             
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,5                                                 
        sx(i) = sy(i)*sx(i)                                             
        sx(i + 1) = sy(i+1)*sx(i + 1)                                   
        sx(i + 2) = sy(i+2)*sx(i + 2)                                   
        sx(i + 3) = sy(i+3)*sx(i + 3)                                   
        sx(i + 4) = sy(i+4)*sx(i + 4)                                   
   50 continue                                                          
      return                                                            
      end                                                               

      real function snrm2 ( n, sx, incx)                                
      integer          next                                             
      real   sx(1),  cutlo, cuthi, hitest, sum, xmax, zero, one         
      data   zero, one /0.0e0, 1.0e0/                                   
c                                                                       
c     euclidean norm of the n-vector stored in sx() with storage        
c     increment incx .                                                  
c     if    n .le. 0 return with result = 0.                            
c     if n .ge. 1 then incx must be .ge. 1                              
c                                                                       
c           c.l.lawson, 1978 jan 08                                     
c                                                                       
c     four phase method     using two built-in constants that are       
c     hopefully applicable to all machines.                             
c         cutlo = maximum of  sqrt(u/eps)  over all known machines.     
c         cuthi = minimum of  sqrt(v)      over all known machines.     
c     where                                                             
c         eps = smallest no. such that eps + 1. .gt. 1.                 
c         u   = smallest positive no.   (underflow limit)               
c         v   = largest  no.            (overflow  limit)               
c                                                                       
c     brief outline of algorithm..                                      
c                                                                       
c     phase 1    scans zero components.                                 
c     move to phase 2 when a component is nonzero and .le. cutlo        
c     move to phase 3 when a component is .gt. cutlo                    
c     move to phase 4 when a component is .ge. cuthi/m                  
c     where m = n for x() real and m = 2*n for complex.                 
c                                                                       
c     values for cutlo and cuthi..                                      
c     from the environmental parameters listed in the imsl converter    
c     document the limiting values are as follows..                     
c     cutlo, s.p.   u/eps = 2**(-102) for  honeywell.  close seconds are
c                   univac and dec at 2**(-103)                         
c                   thus cutlo = 2**(-51) = 4.44089e-16                 
c     cuthi, s.p.   v = 2**127 for univac, honeywell, and dec.          
c                   thus cuthi = 2**(63.5) = 1.30438e19                 
c     cutlo, d.p.   u/eps = 2**(-67) for honeywell and dec.             
c                   thus cutlo = 2**(-33.5) = 8.23181d-11               
c     cuthi, d.p.   same as s.p.  cuthi = 1.30438d19                    
c     data cutlo, cuthi / 8.232d-11,  1.304d19 /                        
c     data cutlo, cuthi / 4.441e-16,  1.304e19 /                        
      data cutlo, cuthi / 4.441e-16,  1.304e19 /                        
c                                                                       
      if(n .gt. 0) go to 10                                             
         snrm2  = zero                                                  
         go to 300                                                      
c                                                                       
   10 assign 30 to next                                                 
      sum = zero                                                        
      nn = n * incx                                                     
c                                                 begin main loop       
      i = 1                                                             
   20    go to next,(30, 50, 70, 110)                                   
   30 if( abs(sx(i)) .gt. cutlo) go to 85                               
      assign 50 to next                                                 
      xmax = zero                                                       
c                                                                       
c                        phase 1.  sum is zero                          
c                                                                       
   50 if( sx(i) .eq. zero) go to 200                                    
      if( abs(sx(i)) .gt. cutlo) go to 85                               
c                                                                       
c                                prepare for phase 2.                   
      go to 105                                                         
c                                                                       
c                                prepare for phase 4.                   
c                                                                       
  100 i = j                                                             
      assign 110 to next                                                
      sum = (sum / sx(i)) / sx(i)                                       
  105 xmax = abs(sx(i))                                                 
      go to 115                                                         
c                                                                       
c                   phase 2.  sum is small.                             
c                             scale to avoid destructive underflow.     
c                                                                       
   70 if( abs(sx(i)) .gt. cutlo ) go to 75                              
c                                                                       
c                     common code for phases 2 and 4.                   
c                     in phase 4 sum is large.  scale to avoid overflow.
c                                                                       
  110 if( abs(sx(i)) .le. xmax ) go to 115                              
         sum = one + sum * (xmax / sx(i))**2                            
         xmax = abs(sx(i))                                              
         go to 200                                                      
c                                                                       
  115 sum = sum + (sx(i)/xmax)**2                                       
      go to 200                                                         
c                                                                       
c                                                                       
c                  prepare for phase 3.                                 
c                                                                       
   75 sum = (sum * xmax) * xmax                                         
c                                                                       
c                                                                       
c     for real or d.p. set hitest = cuthi/n                             
c     for complex      set hitest = cuthi/(2*n)                         
c                                                                       
   85 hitest = cuthi/float( n )                                         
c                                                                       
c                   phase 3.  sum is mid-range.  no scaling.            
c                                                                       
      do 95 j =i,nn,incx                                                
      if(abs(sx(j)) .ge. hitest) go to 100                              
   95    sum = sum + sx(j)**2                                           
      snrm2 = sqrt( sum )                                               
      go to 300                                                         
c                                                                       
  200 continue                                                          
      i = i + incx                                                      
      if ( i .le. nn ) go to 20                                         
c                                                                       
c              end of main loop.                                        
c                                                                       
c              compute square root and adjust for scaling.              
c                                                                       
      snrm2 = xmax * sqrt(sum)                                          
  300 continue                                                          
      return                                                            
      end                                                               
      subroutine sptsv( n, nrhs, d, e, b, ldb, info )
*
*  -- lapack routine (version 2.0) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     march 31, 1993
*
*     .. scalar arguments ..
      integer            info, ldb, n, nrhs
*     ..
*     .. array arguments ..
      real               b( ldb, * ), d( * ), e( * )
*     ..
*
*  purpose
*  =======
*
*  sptsv computes the solution to a real system of linear equations
*  a*x = b, where a is an n-by-n symmetric positive definite tridiagonal
*  matrix, and x and b are n-by-nrhs matrices.
*
*  a is factored as a = l*d*l**t, and the factored form of a is then
*  used to solve the system of equations.
*
*  arguments
*  =========
*
*  n       (input) integer
*          the order of the matrix a.  n >= 0.
*
*  nrhs    (input) integer
*          the number of right hand sides, i.e., the number of columns
*          of the matrix b.  nrhs >= 0.
*
*  d       (input/output) real array, dimension (n)
*          on entry, the n diagonal elements of the tridiagonal matrix
*          a.  on exit, the n diagonal elements of the diagonal matrix
*          d from the factorization a = l*d*l**t.
*
*  e       (input/output) real array, dimension (n-1)
*          on entry, the (n-1) subdiagonal elements of the tridiagonal
*          matrix a.  on exit, the (n-1) subdiagonal elements of the
*          unit bidiagonal factor l from the l*d*l**t factorization of
*          a.  (e can also be regarded as the superdiagonal of the unit
*          bidiagonal factor u from the u**t*d*u factorization of a.)
*
*  b       (input/output) real array, dimension (ldb,n)
*          on entry, the n-by-nrhs right hand side matrix b.
*          on exit, if info = 0, the n-by-nrhs solution matrix x.
*
*  ldb     (input) integer
*          the leading dimension of the array b.  ldb >= max(1,n).
*
*  info    (output) integer
*          = 0:  successful exit
*          < 0:  if info = -i, the i-th argument had an illegal value
*          > 0:  if info = i, the leading minor of order i is not
*                positive definite, and the solution has not been
*                computed.  the factorization has not been completed
*                unless i = n.
*
*  =====================================================================
*
*     .. external subroutines ..
      external           spttrf, spttrs, xerbla2
*     ..
*     .. intrinsic functions ..
      intrinsic          max
*     ..
*     .. executable statements ..
*
*     test the input parameters.
*
      info = 0
      if( n.lt.0 ) then
         info = -1
      else if( nrhs.lt.0 ) then
         info = -2
      else if( ldb.lt.max( 1, n ) ) then
         info = -6
      end if
      if( info.ne.0 ) then
         call xerbla2( 'sptsv ', -info )
         return
      end if
*
*     compute the l*d*l' (or u'*d*u) factorization of a.
*
      call spttrf( n, d, e, info )
      if( info.eq.0 ) then
*
*        solve the system a*x = b, overwriting b with x.
*
         call spttrs( n, nrhs, d, e, b, ldb, info )
      end if
      return
*
*     end of sptsv
*
      end
      subroutine spttrf( n, d, e, info )
*
*  -- lapack routine (version 2.0) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     march 31, 1993
*
*     .. scalar arguments ..
      integer            info, n
*     ..
*     .. array arguments ..
      real               d( * ), e( * )
*     ..
*
*  purpose
*  =======
*
*  spttrf computes the factorization of a real symmetric positive
*  definite tridiagonal matrix a.
*
*  if the subdiagonal elements of a are supplied in the array e, the
*  factorization has the form a = l*d*l**t, where d is diagonal and l
*  is unit lower bidiagonal; if the superdiagonal elements of a are
*  supplied, it has the form a = u**t*d*u, where u is unit upper
*  bidiagonal.  (the two forms are equivalent if a is real.)
*
*  arguments
*  =========
*
*  n       (input) integer
*          the order of the matrix a.  n >= 0.
*
*  d       (input/output) real array, dimension (n)
*          on entry, the n diagonal elements of the tridiagonal matrix
*          a.  on exit, the n diagonal elements of the diagonal matrix
*          d from the l*d*l**t factorization of a.
*
*  e       (input/output) real array, dimension (n-1)
*          on entry, the (n-1) off-diagonal elements of the tridiagonal
*          matrix a.
*          on exit, the (n-1) off-diagonal elements of the unit
*          bidiagonal factor l or u from the factorization of a.
*
*  info    (output) integer
*          = 0:  successful exit
*          < 0:  if info = -i, the i-th argument had an illegal value
*          > 0:  if info = i, the leading minor of order i is not
*                positive definite; if i < n, the factorization could
*                not be completed, while if i = n, the factorization was
*                completed, but d(n) = 0.
*
*  =====================================================================
*
*     .. parameters ..
      real               zero
      parameter          ( zero = 0.0e+0 )
*     ..
*     .. local scalars ..
      integer            i
      real               di, ei
*     ..
*     .. external subroutines ..
      external           xerbla2
*     ..
*     .. executable statements ..
*
*     test the input parameters.
*
      info = 0
      if( n.lt.0 ) then
         info = -1
         call xerbla2( 'spttrf', -info )
         return
      end if
*
*     quick return if possible
*
      if( n.eq.0 )
     $   return
*
*     compute the l*d*l' (or u'*d*u) factorization of a.
*
      do 10 i = 1, n - 1
*
*        drop out of the loop if d(i) <= 0: the matrix is not positive
*        definite.
*
         di = d( i )
         if( di.le.zero )
     $      go to 20
*
*        solve for e(i) and d(i+1).
*
         ei = e( i )
         e( i ) = ei / di
         d( i+1 ) = d( i+1 ) - e( i )*ei
   10 continue
*
*     check d(n) for positive definiteness.
*
      i = n
      if( d( i ).gt.zero )
     $   go to 30
*
   20 continue
      info = i
*
   30 continue
      return
*
*     end of spttrf
*
      end
      subroutine spttrs( n, nrhs, d, e, b, ldb, info )
*
*  -- lapack routine (version 2.0) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     march 31, 1993
*
*     .. scalar arguments ..
      integer            info, ldb, n, nrhs
*     ..
*     .. array arguments ..
      real               b( ldb, * ), d( * ), e( * )
*     ..
*
*  purpose
*  =======
*
*  spttrs solves a system of linear equations a * x = b with a
*  symmetric positive definite tridiagonal matrix a using the
*  factorization a = l*d*l**t or a = u**t*d*u computed by spttrf.
*  (the two forms are equivalent if a is real.)
*
*  arguments
*  =========
*
*  n       (input) integer
*          the order of the tridiagonal matrix a.  n >= 0.
*
*  nrhs    (input) integer
*          the number of right hand sides, i.e., the number of columns
*          of the matrix b.  nrhs >= 0.
*
*  d       (input) real array, dimension (n)
*          the n diagonal elements of the diagonal matrix d from the
*          factorization computed by spttrf.
*
*  e       (input) real array, dimension (n-1)
*          the (n-1) off-diagonal elements of the unit bidiagonal factor
*          u or l from the factorization computed by spttrf.
*
*  b       (input/output) real array, dimension (ldb,nrhs)
*          on entry, the right hand side matrix b.
*          on exit, the solution matrix x.
*
*  ldb     (input) integer
*          the leading dimension of the array b.  ldb >= max(1,n).
*
*  info    (output) integer
*          = 0:  successful exit
*          < 0:  if info = -i, the i-th argument had an illegal value
*
*  =====================================================================
*
*     .. local scalars ..
      integer            i, j
*     ..
*     .. external subroutines ..
      external           xerbla2
*     ..
*     .. intrinsic functions ..
      intrinsic          max
*     ..
*     .. executable statements ..
*
*     test the input arguments.
*
      info = 0
      if( n.lt.0 ) then
         info = -1
      else if( nrhs.lt.0 ) then
         info = -2
      else if( ldb.lt.max( 1, n ) ) then
         info = -6
      end if
      if( info.ne.0 ) then
         call xerbla2( 'spttrs', -info )
         return
      end if
*
*     quick return if possible
*
      if( n.eq.0 )
     $   return
*
*     solve a * x = b using the factorization a = l*d*l',
*     overwriting each right hand side vector with its solution.
*
      do 30 j = 1, nrhs
*
*        solve l * x = b.
*
         do 10 i = 2, n
            b( i, j ) = b( i, j ) - b( i-1, j )*e( i-1 )
   10    continue
*
*        solve d * l' * x = b.
*
         b( n, j ) = b( n, j ) / d( n )
         do 20 i = n - 1, 1, -1
            b( i, j ) = b( i, j ) / d( i ) - b( i+1, j )*e( i )
   20    continue
   30 continue
*
      return
*
*     end of spttrs
*
      end
      subroutine xerbla2( srname, info )
*
*  -- lapack auxiliary routine (version 2.0) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     september 30, 1994
*
*     .. scalar arguments ..
      character*6        srname
      integer            info
*     ..
*
*  purpose
*  =======
*
*  xerbla2  is an error handler for the lapack routines.
*  it is called by an lapack routine if an input parameter has an
*  invalid value.  a message is printed and execution stops.
*
*  installers may consider modifying the stop statement in order to
*  call system-specific exception-handling facilities.
*
*  arguments
*  =========
*
*  srname  (input) character*6
*          the name of the routine which called xerbla2.
*
*  info    (input) integer
*          the position of the invalid parameter in the parameter list
*          of the calling routine.
*
* =====================================================================
*
*     .. executable statements ..
*
      write( *, fmt = 9999 )srname, info
*
      stop
*
 9999 format( ' ** on entry to ', a6, ' parameter number ', i2, ' had ',
     $      'an illegal value' )
*
*     end of xerbla2
*
      end
      subroutine  srot (n,sx,incx,sy,incy,c,s)                          
c                                                                       
c     applies a plane rotation.                                         
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sx(1),sy(1),stemp,c,s                                        
      integer i,incx,incy,ix,iy,n                                       
c                                                                       
      if(n.le.0)return                                                  
      if(incx.eq.1.and.incy.eq.1)go to 20                               
c                                                                       
c       code for unequal increments or equal increments not equal       
c         to 1                                                          
c                                                                       
      ix = 1                                                            
      iy = 1                                                            
      if(incx.lt.0)ix = (-n+1)*incx + 1                                 
      if(incy.lt.0)iy = (-n+1)*incy + 1                                 
      do 10 i = 1,n                                                     
        stemp = c*sx(ix) + s*sy(iy)                                     
        sy(iy) = c*sy(iy) - s*sx(ix)                                    
        sx(ix) = stemp                                                  
        ix = ix + incx                                                  
        iy = iy + incy                                                  
   10 continue                                                          
      return                                                            
c                                                                       
c       code for both increments equal to 1                             
c                                                                       
   20 do 30 i = 1,n                                                     
        stemp = c*sx(i) + s*sy(i)                                       
        sy(i) = c*sy(i) - s*sx(i)                                       
        sx(i) = stemp                                                   
   30 continue                                                          
      return                                                            
      end                                                               
      subroutine srotg(sa,sb,c,s)                                       
c                                                                       
c     construct givens plane rotation.                                  
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sa,sb,c,s,roe,scale,r,z                                      
c                                                                       
      roe = sb                                                          
      if( abs(sa) .gt. abs(sb) ) roe = sa                               
      scale = abs(sa) + abs(sb)                                         
      if( scale .ne. 0.0 ) go to 10                                     
         c = 1.0                                                        
         s = 0.0                                                        
         r = 0.0                                                        
         go to 20                                                       
   10 r = scale*sqrt((sa/scale)**2 + (sb/scale)**2)                     
      r = sign(1.0,roe)*r                                               
      c = sa/r                                                          
      s = sb/r                                                          
   20 z = 1.0                                                           
      if( abs(sa) .gt. abs(sb) ) z = s                                  
      if( abs(sb) .ge. abs(sa) .and. c .ne. 0.0 ) z = 1.0/c             
      sa = r                                                            
      sb = z                                                            
      return                                                            
      end                                                               
      subroutine  sscal(n,sa,sx,incx)                                   
c                                                                       
c     scales a vector by a constant.                                    
c     uses unrolled loops for increment equal to 1.                     
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sa,sx(*)                                                     
      integer i,incx,m,mp1,n,nincx                                      
c                                                                       
      if(n.le.0)return                                                  
      if(incx.eq.1)go to 20                                             
c                                                                       
c        code for increment not equal to 1                              
c                                                                       
      nincx = n*incx                                                    
      do 10 i = 1,nincx,incx                                            
        sx(i) = sa*sx(i)                                                
   10 continue                                                          
      return                                                            
c                                                                       
c        code for increment equal to 1                                  
c                                                                       
c                                                                       
c        clean-up loop                                                  
c                                                                       
   20 m = mod(n,5)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        sx(i) = sa*sx(i)                                                
   30 continue                                                          
      if( n .lt. 5 ) return                                             
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,5                                                 
        sx(i) = sa*sx(i)                                                
        sx(i + 1) = sa*sx(i + 1)                                        
        sx(i + 2) = sa*sx(i + 2)                                        
        sx(i + 3) = sa*sx(i + 3)                                        
        sx(i + 4) = sa*sx(i + 4)                                        
   50 continue                                                          
      return                                                            
      end                                                               
      subroutine ssteqr( compz, n, d, e, z, ldz, work, info )
*
*  -- lapack routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     march 31, 1993 
*
*     .. scalar arguments ..
      character          compz
      integer            info, ldz, n
*     ..
*     .. array arguments ..
      real               d( * ), e( * ), work( * ), z( ldz, * )
*     ..
*
*  purpose
*  =======
*
*  ssteqr computes all eigenvalues and, optionally, eigenvectors of a
*  symmetric tridiagonal matrix using the implicit ql or qr method.
*  the eigenvectors of a full or band symmetric matrix can also be found
*  if ssytrd or ssptrd or ssbtrd has been used to reduce this matrix to
*  tridiagonal form.
*
*  arguments
*  =========
*
*  compz   (input) character*1
*          = 'n':  compute eigenvalues only.
*          = 'v':  compute eigenvalues and eigenvectors of the original
*                  symmetric matrix.  on entry, z must contain the
*                  orthogonal matrix used to reduce the original matrix
*                  to tridiagonal form.
*          = 'i':  compute eigenvalues and eigenvectors of the
*                  tridiagonal matrix.  z is initialized to the identity
*                  matrix.
*
*  n       (input) integer
*          the order of the matrix.  n >= 0.
*
*  d       (input/output) real array, dimension (n)
*          on entry, the diagonal elements of the tridiagonal matrix.
*          on exit, if info = 0, the eigenvalues in ascending order.
*
*  e       (input/output) real array, dimension (n-1)
*          on entry, the (n-1) subdiagonal elements of the tridiagonal
*          matrix.
*          on exit, e has been destroyed.
*
*  z       (input/output) real array, dimension (ldz, n)
*          on entry, if  compz = 'v', then z contains the orthogonal
*          matrix used in the reduction to tridiagonal form.
*          on exit, if  compz = 'v', z contains the orthonormal
*          eigenvectors of the original symmetric matrix, and if
*          compz = 'i', z contains the orthonormal eigenvectors of
*          the symmetric tridiagonal matrix.  if an error exit is
*          made, z contains the eigenvectors associated with the
*          stored eigenvalues.
*          if compz = 'n', then z is not referenced.
*
*  ldz     (input) integer
*          the leading dimension of the array z.  ldz >= 1, and if
*          eigenvectors are desired, then  ldz >= max(1,n).
*
*  work    (workspace) real array, dimension (max(1,2*n-2))
*          if compz = 'n', then work is not referenced.
*
*  info    (output) integer
*          = 0:  successful exit
*          < 0:  if info = -i, the i-th argument had an illegal value
*          > 0:  the algorithm has failed to find all the eigenvalues in
*                a total of 30*n iterations; if info = i, then i
*                elements of e have not converged to zero; on exit, d
*                and e contain the elements of a symmetric tridiagonal
*                matrix which is orthogonally similar to the original
*                matrix.
*
*  =====================================================================
*
*     .. parameters ..
      real               zero, one, two
      parameter          ( zero = 0.0, one = 1.0, two = 2.0 )
      integer            maxit
      parameter          ( maxit = 30 )
*     ..
*     .. local scalars ..
      integer            i, icompz, ii, j, jtot, k, l, l1, lend, lendm1,
     $                   lendp1, lm1, m, mm, mm1, nm1, nmaxit
      real               b, c, eps, f, g, p, r, rt1, rt2, s, tst
*     ..
*     .. external functions ..
      logical            lsame
      real               slamch, slapy2
      external           lsame, slamch, slapy2
*     ..
*     .. external subroutines ..
      external           slae2, slaev2, slartg, slasr, slazro, sswap,
     $                   xerbla
*     ..
*     .. intrinsic functions ..
      intrinsic          abs, max, sign
*     ..
*     .. executable statements ..
*
*     test the input parameters.
*
      info = 0
*
      if( lsame( compz, 'n' ) ) then
         icompz = 0
      else if( lsame( compz, 'v' ) ) then
         icompz = 1
      else if( lsame( compz, 'i' ) ) then
         icompz = 2
      else
         icompz = -1
      end if
      if( icompz.lt.0 ) then
         info = -1
      else if( n.lt.0 ) then
         info = -2
      else if( ( ldz.lt.1 ) .or. ( icompz.gt.0 .and. ldz.lt.max( 1,
     $         n ) ) ) then
         info = -6
      end if
      if( info.ne.0 ) then
         call xerbla( 'ssteqr', -info )
         return
      end if
*
*     quick return if possible
*
      if( n.eq.0 )
     $   return
*
      if( n.eq.1 ) then
         if( icompz.gt.0 )
     $      z( 1, 1 ) = one
         return
      end if
*
*     determine the unit roundoff for this environment.
*
      eps = slamch( 'e' )
*
*     compute the eigenvalues and eigenvectors of the tridiagonal
*     matrix.
*
      if( icompz.eq.2 )
     $   call slazro( n, n, zero, one, z, ldz )
*
      nmaxit = n*maxit
      jtot = 0
*
*     determine where the matrix splits and choose ql or qr iteration
*     for each block, according to whether top or bottom diagonal
*     element is smaller.
*
      l1 = 1
      nm1 = n - 1
*
   10 continue
      if( l1.gt.n )
     $   go to 160
      if( l1.gt.1 )
     $   e( l1-1 ) = zero
      if( l1.le.nm1 ) then
         do 20 m = l1, nm1
            tst = abs( e( m ) )
            if( tst.le.eps*( abs( d( m ) )+abs( d( m+1 ) ) ) )
     $         go to 30
   20    continue
      end if
      m = n
*
   30 continue
      l = l1
      lend = m
      if( abs( d( lend ) ).lt.abs( d( l ) ) ) then
         l = lend
         lend = l1
      end if
      l1 = m + 1
*
      if( lend.ge.l ) then
*
*        ql iteration
*
*        look for small subdiagonal element.
*
   40    continue
         if( l.ne.lend ) then
            lendm1 = lend - 1
            do 50 m = l, lendm1
               tst = abs( e( m ) )
               if( tst.le.eps*( abs( d( m ) )+abs( d( m+1 ) ) ) )
     $            go to 60
   50       continue
         end if
*
         m = lend
*
   60    continue
         if( m.lt.lend )
     $      e( m ) = zero
         p = d( l )
         if( m.eq.l )
     $      go to 80
*
*        if remaining matrix is 2-by-2, use slae2 or slaev2
*        to compute its eigensystem.
*
         if( m.eq.l+1 ) then
            if( icompz.gt.0 ) then
               call slaev2( d( l ), e( l ), d( l+1 ), rt1, rt2, c, s )
               work( l ) = c
               work( n-1+l ) = s
               call slasr( 'r', 'v', 'b', n, 2, work( l ),
     $                     work( n-1+l ), z( 1, l ), ldz )
            else
               call slae2( d( l ), e( l ), d( l+1 ), rt1, rt2 )
            end if
            d( l ) = rt1
            d( l+1 ) = rt2
            e( l ) = zero
            l = l + 2
            if( l.le.lend )
     $         go to 40
            go to 10
         end if
*
         if( jtot.eq.nmaxit )
     $      go to 140
         jtot = jtot + 1
*
*        form shift.
*
         g = ( d( l+1 )-p ) / ( two*e( l ) )
         r = slapy2( g, one )
         g = d( m ) - p + ( e( l ) / ( g+sign( r, g ) ) )
*
         s = one
         c = one
         p = zero
*
*        inner loop
*
         mm1 = m - 1
         do 70 i = mm1, l, -1
            f = s*e( i )
            b = c*e( i )
            call slartg( g, f, c, s, r )
            if( i.ne.m-1 )
     $         e( i+1 ) = r
            g = d( i+1 ) - p
            r = ( d( i )-g )*s + two*c*b
            p = s*r
            d( i+1 ) = g + p
            g = c*r - b
*
*           if eigenvectors are desired, then save rotations.
*
            if( icompz.gt.0 ) then
               work( i ) = c
               work( n-1+i ) = -s
            end if
*
   70    continue
*
*        if eigenvectors are desired, then apply saved rotations.
*
         if( icompz.gt.0 ) then
            mm = m - l + 1
            call slasr( 'r', 'v', 'b', n, mm, work( l ), work( n-1+l ),
     $                  z( 1, l ), ldz )
         end if
*
         d( l ) = d( l ) - p
         e( l ) = g
         go to 40
*
*        eigenvalue found.
*
   80    continue
         d( l ) = p
*
         l = l + 1
         if( l.le.lend )
     $      go to 40
         go to 10
*
      else
*
*        qr iteration
*
*        look for small superdiagonal element.
*
   90    continue
         if( l.ne.lend ) then
            lendp1 = lend + 1
            do 100 m = l, lendp1, -1
               tst = abs( e( m-1 ) )
               if( tst.le.eps*( abs( d( m ) )+abs( d( m-1 ) ) ) )
     $            go to 110
  100       continue
         end if
*
         m = lend
*
  110    continue
         if( m.gt.lend )
     $      e( m-1 ) = zero
         p = d( l )
         if( m.eq.l )
     $      go to 130
*
*        if remaining matrix is 2-by-2, use slae2 or slaev2
*        to compute its eigensystem.
*
         if( m.eq.l-1 ) then
            if( icompz.gt.0 ) then
               call slaev2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2, c, s )
               work( m ) = c
               work( n-1+m ) = s
               call slasr( 'r', 'v', 'f', n, 2, work( m ),
     $                     work( n-1+m ), z( 1, l-1 ), ldz )
            else
               call slae2( d( l-1 ), e( l-1 ), d( l ), rt1, rt2 )
            end if
            d( l-1 ) = rt1
            d( l ) = rt2
            e( l-1 ) = zero
            l = l - 2
            if( l.ge.lend )
     $         go to 90
            go to 10
         end if
*
         if( jtot.eq.nmaxit )
     $      go to 140
         jtot = jtot + 1
*
*        form shift.
*
         g = ( d( l-1 )-p ) / ( two*e( l-1 ) )
         r = slapy2( g, one )
         g = d( m ) - p + ( e( l-1 ) / ( g+sign( r, g ) ) )
*
         s = one
         c = one
         p = zero
*
*        inner loop
*
         lm1 = l - 1
         do 120 i = m, lm1
            f = s*e( i )
            b = c*e( i )
            call slartg( g, f, c, s, r )
            if( i.ne.m )
     $         e( i-1 ) = r
            g = d( i ) - p
            r = ( d( i+1 )-g )*s + two*c*b
            p = s*r
            d( i ) = g + p
            g = c*r - b
*
*           if eigenvectors are desired, then save rotations.
*
            if( icompz.gt.0 ) then
               work( i ) = c
               work( n-1+i ) = s
            end if
*
  120    continue
*
*        if eigenvectors are desired, then apply saved rotations.
*
         if( icompz.gt.0 ) then
            mm = l - m + 1
            call slasr( 'r', 'v', 'f', n, mm, work( m ), work( n-1+m ),
     $                  z( 1, m ), ldz )
         end if
*
         d( l ) = d( l ) - p
         e( lm1 ) = g
         go to 90
*
*        eigenvalue found.
*
  130    continue
         d( l ) = p
*
         l = l - 1
         if( l.ge.lend )
     $      go to 90
         go to 10
*
      end if
*
*     set error -- no convergence to an eigenvalue after a total
*     of n*maxit iterations.
*
  140 continue
      do 150 i = 1, n - 1
         if( e( i ).ne.zero )
     $      info = info + 1
  150 continue
      return
*
*     order eigenvalues and eigenvectors.
*
  160 continue
      do 180 ii = 2, n
         i = ii - 1
         k = i
         p = d( i )
         do 170 j = ii, n
            if( d( j ).lt.p ) then
               k = j
               p = d( j )
            end if
  170    continue
         if( k.ne.i ) then
            d( k ) = d( i )
            d( i ) = p
            if( icompz.gt.0 )
     $         call sswap( n, z( 1, i ), 1, z( 1, k ), 1 )
         end if
  180 continue
*
      return
*
*     end of ssteqr
*
      end
      subroutine slartg( f, g, cs, sn, r )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      real               cs, f, g, r, sn
*     ..
*
*  purpose
*  =======
*
*  slartg generate a plane rotation so that
*
*     [  cs  sn  ]  .  [ f ]  =  [ r ]   where cs**2 + sn**2 = 1.
*     [ -sn  cs  ]     [ g ]     [ 0 ]
*
*  this is a faster version of the blas1 routine srotg, except for
*  the following differences:
*     f and g are unchanged on return.
*     if g=0, then cs=1 and sn=0.
*     if f=0 and (g .ne. 0), then cs=0 and sn=1 without doing any
*        floating point operations (saves work in sbdsqr when
*        there are zeros on the diagonal).
*
*  arguments
*  =========
*
*  f       (input) real
*          the first component of vector to be rotated.
*
*  g       (input) real
*          the second component of vector to be rotated.
*
*  cs      (output) real
*          the cosine of the rotation.
*
*  sn      (output) real
*          the sine of the rotation.
*
*  r       (output) real
*          the nonzero component of the rotated vector.
*
*  =====================================================================
*
*     .. parameters ..
      real               zero
      parameter          ( zero = 0.0e0 )
      real               one
      parameter          ( one = 1.0e0 )
*     ..
*     .. local scalars ..
      real               t, tt
*     ..
*     .. intrinsic functions ..
      intrinsic          abs, sqrt
*     ..
*     .. executable statements ..
*
      if( g.eq.zero ) then
         cs = one
         sn = zero
         r = f
      else if( f.eq.zero ) then
         cs = zero
         sn = one
         r = g
      else
         if( abs( f ).gt.abs( g ) ) then
            t = g / f
            tt = sqrt( one+t*t )
            cs = one / tt
            sn = t*cs
            r = f*tt
         else
            t = f / g
            tt = sqrt( one+t*t )
            sn = one / tt
            cs = t*sn
            r = g*tt
         end if
      end if
      return
*
*     end of slartg
*
      end
      real             function slapy2( x, y )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      real               x, y
*     ..
*
*  purpose
*  =======
*
*  slapy2 returns sqrt(x**2+y**2), taking care not to cause unnecessary
*  overflow.
*
*  arguments
*  =========
*
*  x       (input) real
*  y       (input) real
*          x and y specify the values x and y.
*
*  =====================================================================
*
*     .. parameters ..
      real               zero
      parameter          ( zero = 0.0e0 )
      real               one
      parameter          ( one = 1.0e0 )
*     ..
*     .. local scalars ..
      real               w, xabs, yabs, z
*     ..
*     .. intrinsic functions ..
      intrinsic          abs, max, min, sqrt
*     ..
*     .. executable statements ..
*
      xabs = abs( x )
      yabs = abs( y )
      w = max( xabs, yabs )
      z = min( xabs, yabs )
      if( z.eq.zero ) then
         slapy2 = w
      else
         slapy2 = w*sqrt( one+( z / w )**2 )
      end if
      return
*
*     end of slapy2
*
      end
      subroutine slae2( a, b, c, rt1, rt2 )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      real               a, b, c, rt1, rt2
*     ..
*
*  purpose
*  =======
*
*  slae2  computes the eigenvalues of a 2-by-2 symmetric matrix
*     [  a   b  ]
*     [  b   c  ].
*  on return, rt1 is the eigenvalue of larger absolute value, and rt2
*  is the eigenvalue of smaller absolute value.
*
*  arguments
*  =========
*
*  a       (input) real
*          the (1,1) entry of the 2-by-2 matrix.
*
*  b       (input) real
*          the (1,2) and (2,1) entries of the 2-by-2 matrix.
*
*  c       (input) real
*          the (2,2) entry of the 2-by-2 matrix.
*
*  rt1     (output) real
*          the eigenvalue of larger absolute value.
*
*  rt2     (output) real
*          the eigenvalue of smaller absolute value.
*
*  further details
*  ===============
*
*  rt1 is accurate to a few ulps barring over/underflow.
*
*  rt2 may be inaccurate if there is massive cancellation in the
*  determinant a*c-b*b; higher precision or correctly rounded or
*  correctly truncated arithmetic would be needed to compute rt2
*  accurately in all cases.
*
*  overflow is possible only if rt1 is within a factor of 5 of overflow.
*  underflow is harmless if the input data is 0 or exceeds
*     underflow_threshold / macheps.
*
* =====================================================================
*
*     .. parameters ..
      real               one
      parameter          ( one = 1.0e0 )
      real               two
      parameter          ( two = 2.0e0 )
      real               zero
      parameter          ( zero = 0.0e0 )
      real               half
      parameter          ( half = 0.5e0 )
*     ..
*     .. local scalars ..
      real               ab, acmn, acmx, adf, df, rt, sm, tb
*     ..
*     .. intrinsic functions ..
      intrinsic          abs, sqrt
*     ..
*     .. executable statements ..
*
*     compute the eigenvalues
*
      sm = a + c
      df = a - c
      adf = abs( df )
      tb = b + b
      ab = abs( tb )
      if( abs( a ).gt.abs( c ) ) then
         acmx = a
         acmn = c
      else
         acmx = c
         acmn = a
      end if
      if( adf.gt.ab ) then
         rt = adf*sqrt( one+( ab / adf )**2 )
      else if( adf.lt.ab ) then
         rt = ab*sqrt( one+( adf / ab )**2 )
      else
*
*        includes case ab=adf=0
*
         rt = ab*sqrt( two )
      end if
      if( sm.lt.zero ) then
         rt1 = half*( sm-rt )
*
*        order of execution important.
*        to get fully accurate smaller eigenvalue,
*        next line needs to be executed in higher precision.
*
         rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
      else if( sm.gt.zero ) then
         rt1 = half*( sm+rt )
*
*        order of execution important.
*        to get fully accurate smaller eigenvalue,
*        next line needs to be executed in higher precision.
*
         rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
      else
*
*        includes case rt1 = rt2 = 0
*
         rt1 = half*rt
         rt2 = -half*rt
      end if
      return
*
*     end of slae2
*
      end
      subroutine slasr( side, pivot, direct, m, n, c, s, a, lda )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      character          direct, pivot, side
      integer            lda, m, n
*     ..
*     .. array arguments ..
      real               a( lda, * ), c( * ), s( * )
*     ..
*
*  purpose
*  =======
*
*  slasr   performs the transformation
*
*     a := p*a,   when side = 'l' or 'l'  (  left-hand side )
*
*     a := a*p',  when side = 'r' or 'r'  ( right-hand side )
*
*  where a is an m by n real matrix and p is an orthogonal matrix,
*  consisting of a sequence of plane rotations determined by the
*  parameters pivot and direct as follows ( z = m when side = 'l' or 'l'
*  and z = n when side = 'r' or 'r' ):
*
*  when  direct = 'f' or 'f'  ( forward sequence ) then
*
*     p = p( z - 1 )*...*p( 2 )*p( 1 ),
*
*  and when direct = 'b' or 'b'  ( backward sequence ) then
*
*     p = p( 1 )*p( 2 )*...*p( z - 1 ),
*
*  where  p( k ) is a plane rotation matrix for the following planes:
*
*     when  pivot = 'v' or 'v'  ( variable pivot ),
*        the plane ( k, k + 1 )
*
*     when  pivot = 't' or 't'  ( top pivot ),
*        the plane ( 1, k + 1 )
*
*     when  pivot = 'b' or 'b'  ( bottom pivot ),
*        the plane ( k, z )
*
*  c( k ) and s( k )  must contain the  cosine and sine that define the
*  matrix  p( k ).  the two by two plane rotation part of the matrix
*  p( k ), r( k ), is assumed to be of the form
*
*     r( k ) = (  c( k )  s( k ) ).
*              ( -s( k )  c( k ) )
*
*  this version vectorises across rows of the array a when side = 'l'.
*
*  arguments
*  =========
*
*  side    (input) character*1
*          specifies whether the plane rotation matrix p is applied to
*          a on the left or the right.
*          = 'l':  left, compute a := p*a
*          = 'r':  right, compute a:= a*p'
*
*  direct  (input) character*1
*          specifies whether p is a forward or backward sequence of
*          plane rotations.
*          = 'f':  forward, p = p( z - 1 )*...*p( 2 )*p( 1 )
*          = 'b':  backward, p = p( 1 )*p( 2 )*...*p( z - 1 )
*
*  pivot   (input) character*1
*          specifies the plane for which p(k) is a plane rotation
*          matrix.
*          = 'v':  variable pivot, the plane (k,k+1)
*          = 't':  top pivot, the plane (1,k+1)
*          = 'b':  bottom pivot, the plane (k,z)
*
*  m       (input) integer
*          the number of rows of the matrix a.  if m <= 1, an immediate
*          return is effected.
*
*  n       (input) integer
*          the number of columns of the matrix a.  if n <= 1, an
*          immediate return is effected.
*
*  c, s    (input) real arrays, dimension
*                  (m-1) if side = 'l'
*                  (n-1) if side = 'r'
*          c(k) and s(k) contain the cosine and sine that define the
*          matrix p(k).  the two by two plane rotation part of the
*          matrix p(k), r(k), is assumed to be of the form
*          r( k ) = (  c( k )  s( k ) ).
*                   ( -s( k )  c( k ) )
*
*  a       (input/output) real array, dimension (lda,n)
*          the m by n matrix a.  on exit, a is overwritten by p*a if
*          side = 'r' or by a*p' if side = 'l'.
*
*  lda     (input) integer
*          the leading dimension of the array a.  lda >= max(1,m).
*
*  =====================================================================
*
*     .. parameters ..
      real               one, zero
      parameter          ( one = 1.0e+0, zero = 0.0e+0 )
*     ..
*     .. local scalars ..
      integer            i, info, j
      real               ctemp, stemp, temp
*     ..
*     .. external functions ..
      logical            lsame
      external           lsame
*     ..
*     .. external subroutines ..
      external           xerbla
*     ..
*     .. intrinsic functions ..
      intrinsic          max
*     ..
*     .. executable statements ..
*
*     test the input parameters
*
      info = 0
      if( .not.( lsame( side, 'l' ) .or. lsame( side, 'r' ) ) ) then
         info = 1
      else if( .not.( lsame( pivot, 'v' ) .or. lsame( pivot,
     $         't' ) .or. lsame( pivot, 'b' ) ) ) then
         info = 2
      else if( .not.( lsame( direct, 'f' ) .or. lsame( direct, 'b' ) ) )
     $          then
         info = 3
      else if( m.lt.0 ) then
         info = 4
      else if( n.lt.0 ) then
         info = 5
      else if( lda.lt.max( 1, m ) ) then
         info = 9
      end if
      if( info.ne.0 ) then
         call xerbla( 'slasr ', info )
         return
      end if
*
*     quick return if possible
*
      if( ( m.eq.0 ) .or. ( n.eq.0 ) )
     $   return
      if( lsame( side, 'l' ) ) then
*
*        form  p * a
*
         if( lsame( pivot, 'v' ) ) then
            if( lsame( direct, 'f' ) ) then
               do 20 j = 1, m - 1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 10 i = 1, n
                        temp = a( j+1, i )
                        a( j+1, i ) = ctemp*temp - stemp*a( j, i )
                        a( j, i ) = stemp*temp + ctemp*a( j, i )
   10                continue
                  end if
   20          continue
            else if( lsame( direct, 'b' ) ) then
               do 40 j = m - 1, 1, -1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 30 i = 1, n
                        temp = a( j+1, i )
                        a( j+1, i ) = ctemp*temp - stemp*a( j, i )
                        a( j, i ) = stemp*temp + ctemp*a( j, i )
   30                continue
                  end if
   40          continue
            end if
         else if( lsame( pivot, 't' ) ) then
            if( lsame( direct, 'f' ) ) then
               do 60 j = 2, m
                  ctemp = c( j-1 )
                  stemp = s( j-1 )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 50 i = 1, n
                        temp = a( j, i )
                        a( j, i ) = ctemp*temp - stemp*a( 1, i )
                        a( 1, i ) = stemp*temp + ctemp*a( 1, i )
   50                continue
                  end if
   60          continue
            else if( lsame( direct, 'b' ) ) then
               do 80 j = m, 2, -1
                  ctemp = c( j-1 )
                  stemp = s( j-1 )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 70 i = 1, n
                        temp = a( j, i )
                        a( j, i ) = ctemp*temp - stemp*a( 1, i )
                        a( 1, i ) = stemp*temp + ctemp*a( 1, i )
   70                continue
                  end if
   80          continue
            end if
         else if( lsame( pivot, 'b' ) ) then
            if( lsame( direct, 'f' ) ) then
               do 100 j = 1, m - 1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 90 i = 1, n
                        temp = a( j, i )
                        a( j, i ) = stemp*a( m, i ) + ctemp*temp
                        a( m, i ) = ctemp*a( m, i ) - stemp*temp
   90                continue
                  end if
  100          continue
            else if( lsame( direct, 'b' ) ) then
               do 120 j = m - 1, 1, -1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 110 i = 1, n
                        temp = a( j, i )
                        a( j, i ) = stemp*a( m, i ) + ctemp*temp
                        a( m, i ) = ctemp*a( m, i ) - stemp*temp
  110                continue
                  end if
  120          continue
            end if
         end if
      else if( lsame( side, 'r' ) ) then
*
*        form a * p'
*
         if( lsame( pivot, 'v' ) ) then
            if( lsame( direct, 'f' ) ) then
               do 140 j = 1, n - 1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 130 i = 1, m
                        temp = a( i, j+1 )
                        a( i, j+1 ) = ctemp*temp - stemp*a( i, j )
                        a( i, j ) = stemp*temp + ctemp*a( i, j )
  130                continue
                  end if
  140          continue
            else if( lsame( direct, 'b' ) ) then
               do 160 j = n - 1, 1, -1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 150 i = 1, m
                        temp = a( i, j+1 )
                        a( i, j+1 ) = ctemp*temp - stemp*a( i, j )
                        a( i, j ) = stemp*temp + ctemp*a( i, j )
  150                continue
                  end if
  160          continue
            end if
         else if( lsame( pivot, 't' ) ) then
            if( lsame( direct, 'f' ) ) then
               do 180 j = 2, n
                  ctemp = c( j-1 )
                  stemp = s( j-1 )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 170 i = 1, m
                        temp = a( i, j )
                        a( i, j ) = ctemp*temp - stemp*a( i, 1 )
                        a( i, 1 ) = stemp*temp + ctemp*a( i, 1 )
  170                continue
                  end if
  180          continue
            else if( lsame( direct, 'b' ) ) then
               do 200 j = n, 2, -1
                  ctemp = c( j-1 )
                  stemp = s( j-1 )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 190 i = 1, m
                        temp = a( i, j )
                        a( i, j ) = ctemp*temp - stemp*a( i, 1 )
                        a( i, 1 ) = stemp*temp + ctemp*a( i, 1 )
  190                continue
                  end if
  200          continue
            end if
         else if( lsame( pivot, 'b' ) ) then
            if( lsame( direct, 'f' ) ) then
               do 220 j = 1, n - 1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 210 i = 1, m
                        temp = a( i, j )
                        a( i, j ) = stemp*a( i, n ) + ctemp*temp
                        a( i, n ) = ctemp*a( i, n ) - stemp*temp
  210                continue
                  end if
  220          continue
            else if( lsame( direct, 'b' ) ) then
               do 240 j = n - 1, 1, -1
                  ctemp = c( j )
                  stemp = s( j )
                  if( ( ctemp.ne.one ) .or. ( stemp.ne.zero ) ) then
                     do 230 i = 1, m
                        temp = a( i, j )
                        a( i, j ) = stemp*a( i, n ) + ctemp*temp
                        a( i, n ) = ctemp*a( i, n ) - stemp*temp
  230                continue
                  end if
  240          continue
            end if
         end if
      end if
*
      return
*
*     end of slasr
*
      end
      subroutine xerbla( srname, info )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     february 29, 1992
*
*     .. scalar arguments ..
      character*6        srname
      integer            info
*     ..
*
*  purpose
*  =======
*
*  xerbla  is an error handler for the lapack routines.
*  it is called by an lapack routine if an input parameter has an
*  invalid value.  a message is printed and execution stops.
*
*  installers may consider modifying the stop statement in order to
*  call system-specific exception-handling facilities.
*
*  arguments
*  =========
*
*  srname  (input) character*6
*          the name of the routine which called xerbla.
*
*  info    (input) integer
*          the position of the invalid parameter in the parameter list
*          of the calling routine.
*
*     .. executable statements ..
*
      write( *, fmt = 9999 )srname, info
*
      stop
*
 9999 format( ' ** on entry to ', a6, ' parameter number ', i2, ' had ',
     $      'an illegal value' )
*
*     end of xerbla
*
      end
      subroutine slaev2( a, b, c, rt1, rt2, cs1, sn1 )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      real               a, b, c, cs1, rt1, rt2, sn1
*     ..
*
*  purpose
*  =======
*
*  slaev2 computes the eigendecomposition of a 2-by-2 symmetric matrix
*     [  a   b  ]
*     [  b   c  ].
*  on return, rt1 is the eigenvalue of larger absolute value, rt2 is the
*  eigenvalue of smaller absolute value, and (cs1,sn1) is the unit right
*  eigenvector for rt1, giving the decomposition
*
*     [ cs1  sn1 ] [  a   b  ] [ cs1 -sn1 ]  =  [ rt1  0  ]
*     [-sn1  cs1 ] [  b   c  ] [ sn1  cs1 ]     [  0  rt2 ].
*
*  arguments
*  =========
*
*  a       (input) real
*          the (1,1) entry of the 2-by-2 matrix.
*
*  b       (input) real
*          the (1,2) entry and the conjugate of the (2,1) entry of the
*          2-by-2 matrix.
*
*  c       (input) real
*          the (2,2) entry of the 2-by-2 matrix.
*
*  rt1     (output) real
*          the eigenvalue of larger absolute value.
*
*  rt2     (output) real
*          the eigenvalue of smaller absolute value.
*
*  cs1     (output) real
*  sn1     (output) real
*          the vector (cs1, sn1) is a unit right eigenvector for rt1.
*
*  further details
*  ===============
*
*  rt1 is accurate to a few ulps barring over/underflow.
*
*  rt2 may be inaccurate if there is massive cancellation in the
*  determinant a*c-b*b; higher precision or correctly rounded or
*  correctly truncated arithmetic would be needed to compute rt2
*  accurately in all cases.
*
*  cs1 and sn1 are accurate to a few ulps barring over/underflow.
*
*  overflow is possible only if rt1 is within a factor of 5 of overflow.
*  underflow is harmless if the input data is 0 or exceeds
*     underflow_threshold / macheps.
*
* =====================================================================
*
*     .. parameters ..
      real               one
      parameter          ( one = 1.0e0 )
      real               two
      parameter          ( two = 2.0e0 )
      real               zero
      parameter          ( zero = 0.0e0 )
      real               half
      parameter          ( half = 0.5e0 )
*     ..
*     .. local scalars ..
      integer            sgn1, sgn2
      real               ab, acmn, acmx, acs, adf, cs, ct, df, rt, sm,
     $                   tb, tn
*     ..
*     .. intrinsic functions ..
      intrinsic          abs, sqrt
*     ..
*     .. executable statements ..
*
*     compute the eigenvalues
*
      sm = a + c
      df = a - c
      adf = abs( df )
      tb = b + b
      ab = abs( tb )
      if( abs( a ).gt.abs( c ) ) then
         acmx = a
         acmn = c
      else
         acmx = c
         acmn = a
      end if
      if( adf.gt.ab ) then
         rt = adf*sqrt( one+( ab / adf )**2 )
      else if( adf.lt.ab ) then
         rt = ab*sqrt( one+( adf / ab )**2 )
      else
*
*        includes case ab=adf=0
*
         rt = ab*sqrt( two )
      end if
      if( sm.lt.zero ) then
         rt1 = half*( sm-rt )
         sgn1 = -1
*
*        order of execution important.
*        to get fully accurate smaller eigenvalue,
*        next line needs to be executed in higher precision.
*
         rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
      else if( sm.gt.zero ) then
         rt1 = half*( sm+rt )
         sgn1 = 1
*
*        order of execution important.
*        to get fully accurate smaller eigenvalue,
*        next line needs to be executed in higher precision.
*
         rt2 = ( acmx / rt1 )*acmn - ( b / rt1 )*b
      else
*
*        includes case rt1 = rt2 = 0
*
         rt1 = half*rt
         rt2 = -half*rt
         sgn1 = 1
      end if
*
*     compute the eigenvector
*
      if( df.ge.zero ) then
         cs = df + rt
         sgn2 = 1
      else
         cs = df - rt
         sgn2 = -1
      end if
      acs = abs( cs )
      if( acs.gt.ab ) then
         ct = -tb / cs
         sn1 = one / sqrt( one+ct*ct )
         cs1 = ct*sn1
      else
         if( ab.eq.zero ) then
            cs1 = one
            sn1 = zero
         else
            tn = -cs / tb
            cs1 = one / sqrt( one+tn*tn )
            sn1 = tn*cs1
         end if
      end if
      if( sgn1.eq.sgn2 ) then
         tn = cs1
         cs1 = -sn1
         sn1 = tn
      end if
      return
*
*     end of slaev2
*
      end
      subroutine slazro( m, n, alpha, beta, a, lda )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      integer            lda, m, n
      real               alpha, beta
*     ..
*     .. array arguments ..
      real               a( lda, * )
*     ..
*
*  purpose
*  =======
*
*  slazro initializes a 2-d array a to beta on the diagonal and
*  alpha on the offdiagonals.
*
*  arguments
*  =========
*
*  m       (input) integer
*          the number of rows of the matrix a.  m >= 0.
*
*  n       (input) integer
*          the number of columns of the matrix a.  n >= 0.
*
*  alpha   (input) real
*          the constant to which the offdiagonal elements are to be set.
*
*  beta    (input) real
*          the constant to which the diagonal elements are to be set.
*
*  a       (output) real array, dimension (lda,n)
*          on exit, the leading m by n submatrix of a is set such that
*             a(i,j) = alpha,  1 <= i <= m, 1 <= j <= n, i <> j
*             a(i,i) = beta,   1 <= i <= min(m,n).
*
*  lda     (input) integer
*          the leading dimension of the array a.  lda >= max(1,m).
*
*  =====================================================================
*
*     .. local scalars ..
      integer            i, j
*     ..
*     .. intrinsic functions ..
      intrinsic          min
*     ..
*     .. executable statements ..
*
      do 20 j = 1, n
         do 10 i = 1, m
            a( i, j ) = alpha
   10    continue
   20 continue
*
      do 30 i = 1, min( m, n )
         a( i, i ) = beta
   30 continue
*
      return
*
*     end of slazro
*
      end
      real             function slamch( cmach )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      character          cmach
*     ..
*
*  purpose
*  =======
*
*  slamch determines single precision machine parameters.
*
*  arguments
*  =========
*
*  cmach   (input) character*1
*          specifies the value to be returned by slamch:
*          = 'e' or 'e',   slamch := eps
*          = 's' or 's ,   slamch := sfmin
*          = 'b' or 'b',   slamch := base
*          = 'p' or 'p',   slamch := eps*base
*          = 'n' or 'n',   slamch := t
*          = 'r' or 'r',   slamch := rnd
*          = 'm' or 'm',   slamch := emin
*          = 'u' or 'u',   slamch := rmin
*          = 'l' or 'l',   slamch := emax
*          = 'o' or 'o',   slamch := rmax
*
*          where
*
*          eps   = relative machine precision
*          sfmin = safe minimum, such that 1/sfmin does not overflow
*          base  = base of the machine
*          prec  = eps*base
*          t     = number of (base) digits in the mantissa
*          rnd   = 1.0 when rounding occurs in addition, 0.0 otherwise
*          emin  = minimum exponent before (gradual) underflow
*          rmin  = underflow threshold - base**(emin-1)
*          emax  = largest exponent before overflow
*          rmax  = overflow threshold  - (base**emax)*(1-eps)
*
* =====================================================================
*
*     .. parameters ..
      real               one, zero
      parameter          ( one = 1.0e+0, zero = 0.0e+0 )
*     ..
*     .. local scalars ..
      logical            first, lrnd
      integer            beta, imax, imin, it
      real               base, emax, emin, eps, prec, rmach, rmax, rmin,
     $                   rnd, sfmin, small, t
*     ..
*     .. external functions ..
      logical            lsame
      external           lsame
*     ..
*     .. external subroutines ..
      external           slamc2
*     ..
*     .. save statement ..
      save               first, eps, sfmin, base, t, rnd, emin, rmin,
     $                   emax, rmax, prec
*     ..
*     .. data statements ..
      data               first / .true. /
*     ..
*     .. executable statements ..
*
      if( first ) then
         first = .false.
         call slamc2( beta, it, lrnd, eps, imin, rmin, imax, rmax )
         base = beta
         t = it
         if( lrnd ) then
            rnd = one
            eps = ( base**( 1-it ) ) / 2
         else
            rnd = zero
            eps = base**( 1-it )
         end if
         prec = eps*base
         emin = imin
         emax = imax
         sfmin = rmin
         small = one / rmax
         if( small.ge.sfmin ) then
*
*           use small plus a bit, to avoid the possibility of rounding
*           causing overflow when computing  1/sfmin.
*
            sfmin = small*( one+eps )
         end if
      end if
*
      if( lsame( cmach, 'e' ) ) then
         rmach = eps
      else if( lsame( cmach, 's' ) ) then
         rmach = sfmin
      else if( lsame( cmach, 'b' ) ) then
         rmach = base
      else if( lsame( cmach, 'p' ) ) then
         rmach = prec
      else if( lsame( cmach, 'n' ) ) then
         rmach = t
      else if( lsame( cmach, 'r' ) ) then
         rmach = rnd
      else if( lsame( cmach, 'm' ) ) then
         rmach = emin
      else if( lsame( cmach, 'u' ) ) then
         rmach = rmin
      else if( lsame( cmach, 'l' ) ) then
         rmach = emax
      else if( lsame( cmach, 'o' ) ) then
         rmach = rmax
      end if
*
      slamch = rmach
      return
*
*     end of slamch
*
      end
*
************************************************************************
*
      subroutine slamc1( beta, t, rnd, ieee1 )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      logical            ieee1, rnd
      integer            beta, t
*     ..
*
*  purpose
*  =======
*
*  slamc1 determines the machine parameters given by beta, t, rnd, and
*  ieee1.
*
*  arguments
*  =========
*
*  beta    (output) integer
*          the base of the machine.
*
*  t       (output) integer
*          the number of ( beta ) digits in the mantissa.
*
*  rnd     (output) logical
*          specifies whether proper rounding  ( rnd = .true. )  or
*          chopping  ( rnd = .false. )  occurs in addition. this may not
*          be a reliable guide to the way in which the machine performs
*          its arithmetic.
*
*  ieee1   (output) logical
*          specifies whether rounding appears to be done in the ieee
*          'round to nearest' style.
*
*  further details
*  ===============
*
*  the routine is based on the routine  envron  by malcolm and
*  incorporates suggestions by gentleman and marovich. see
*
*     malcolm m. a. (1972) algorithms to reveal properties of
*        floating-point arithmetic. comms. of the acm, 15, 949-951.
*
*     gentleman w. m. and marovich s. b. (1974) more on algorithms
*        that reveal properties of floating point arithmetic units.
*        comms. of the acm, 17, 276-277.
*
* =====================================================================
*
*     .. local scalars ..
      logical            first, lieee1, lrnd
      integer            lbeta, lt
      real               a, b, c, f, one, qtr, savec, t1, t2
*     ..
*     .. external functions ..
      real               slamc3
      external           slamc3
*     ..
*     .. save statement ..
      save               first, lieee1, lbeta, lrnd, lt
*     ..
*     .. data statements ..
      data               first / .true. /
*     ..
*     .. executable statements ..
*
      if( first ) then
         first = .false.
         one = 1
*
*        lbeta,  lieee1,  lt and  lrnd  are the  local values  of  beta,
*        ieee1, t and rnd.
*
*        throughout this routine  we use the function  slamc3  to ensure
*        that relevant values are  stored and not held in registers,  or
*        are not affected by optimizers.
*
*        compute  a = 2.0**m  with the  smallest positive integer m such
*        that
*
*           fl( a + 1.0 ) = a.
*
         a = 1
         c = 1
*
*+       while( c.eq.one )loop
   10    continue
         if( c.eq.one ) then
            a = 2*a
            c = slamc3( a, one )
            c = slamc3( c, -a )
            go to 10
         end if
*+       end while
*
*        now compute  b = 2.0**m  with the smallest positive integer m
*        such that
*
*           fl( a + b ) .gt. a.
*
         b = 1
         c = slamc3( a, b )
*
*+       while( c.eq.a )loop
   20    continue
         if( c.eq.a ) then
            b = 2*b
            c = slamc3( a, b )
            go to 20
         end if
*+       end while
*
*        now compute the base.  a and c  are neighbouring floating point
*        numbers  in the  interval  ( beta**t, beta**( t + 1 ) )  and so
*        their difference is beta. adding 0.25 to c is to ensure that it
*        is truncated to beta and not ( beta - 1 ).
*
         qtr = one / 4
         savec = c
         c = slamc3( c, -a )
         lbeta = c + qtr
*
*        now determine whether rounding or chopping occurs,  by adding a
*        bit  less  than  beta/2  and a  bit  more  than  beta/2  to  a.
*
         b = lbeta
         f = slamc3( b / 2, -b / 100 )
         c = slamc3( f, a )
         if( c.eq.a ) then
            lrnd = .true.
         else
            lrnd = .false.
         end if
         f = slamc3( b / 2, b / 100 )
         c = slamc3( f, a )
         if( ( lrnd ) .and. ( c.eq.a ) )
     $      lrnd = .false.
*
*        try and decide whether rounding is done in the  ieee  'round to
*        nearest' style. b/2 is half a unit in the last place of the two
*        numbers a and savec. furthermore, a is even, i.e. has last  bit
*        zero, and savec is odd. thus adding b/2 to a should not  change
*        a, but adding b/2 to savec should change savec.
*
         t1 = slamc3( b / 2, a )
         t2 = slamc3( b / 2, savec )
         lieee1 = ( t1.eq.a ) .and. ( t2.gt.savec ) .and. lrnd
*
*        now find  the  mantissa, t.  it should  be the  integer part of
*        log to the base beta of a,  however it is safer to determine  t
*        by powering.  so we find t as the smallest positive integer for
*        which
*
*           fl( beta**t + 1.0 ) = 1.0.
*
         lt = 0
         a = 1
         c = 1
*
*+       while( c.eq.one )loop
   30    continue
         if( c.eq.one ) then
            lt = lt + 1
            a = a*lbeta
            c = slamc3( a, one )
            c = slamc3( c, -a )
            go to 30
         end if
*+       end while
*
      end if
*
      beta = lbeta
      t = lt
      rnd = lrnd
      ieee1 = lieee1
      return
*
*     end of slamc1
*
      end
*
************************************************************************
*
      subroutine slamc2( beta, t, rnd, eps, emin, rmin, emax, rmax )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      logical            rnd
      integer            beta, emax, emin, t
      real               eps, rmax, rmin
*     ..
*
*  purpose
*  =======
*
*  slamc2 determines the machine parameters specified in its argument
*  list.
*
*  arguments
*  =========
*
*  beta    (output) integer
*          the base of the machine.
*
*  t       (output) integer
*          the number of ( beta ) digits in the mantissa.
*
*  rnd     (output) logical
*          specifies whether proper rounding  ( rnd = .true. )  or
*          chopping  ( rnd = .false. )  occurs in addition. this may not
*          be a reliable guide to the way in which the machine performs
*          its arithmetic.
*
*  eps     (output) real
*          the smallest positive number such that
*
*             fl( 1.0 - eps ) .lt. 1.0,
*
*          where fl denotes the computed value.
*
*  emin    (output) integer
*          the minimum exponent before (gradual) underflow occurs.
*
*  rmin    (output) real
*          the smallest normalized number for the machine, given by
*          base**( emin - 1 ), where  base  is the floating point value
*          of beta.
*
*  emax    (output) integer
*          the maximum exponent before overflow occurs.
*
*  rmax    (output) real
*          the largest positive number for the machine, given by
*          base**emax * ( 1 - eps ), where  base  is the floating point
*          value of beta.
*
*  further details
*  ===============
*
*  the computation of  eps  is based on a routine paranoia by
*  w. kahan of the university of california at berkeley.
*
* =====================================================================
*
*     .. local scalars ..
      logical            first, ieee, iwarn, lieee1, lrnd
      integer            gnmin, gpmin, i, lbeta, lemax, lemin, lt,
     $                   ngnmin, ngpmin
      real               a, b, c, half, leps, lrmax, lrmin, one, rbase,
     $                   sixth, small, third, two, zero
*     ..
*     .. external functions ..
      real               slamc3
      external           slamc3
*     ..
*     .. external subroutines ..
      external           slamc1, slamc4, slamc5
*     ..
*     .. intrinsic functions ..
      intrinsic          abs, max, min
*     ..
*     .. save statement ..
      save               first, iwarn, lbeta, lemax, lemin, leps, lrmax,
     $                   lrmin, lt
*     ..
*     .. data statements ..
      data               first / .true. / , iwarn / .false. /
*     ..
*     .. executable statements ..
*
      if( first ) then
         first = .false.
         zero = 0
         one = 1
         two = 2
*
*        lbeta, lt, lrnd, leps, lemin and lrmin  are the local values of
*        beta, t, rnd, eps, emin and rmin.
*
*        throughout this routine  we use the function  slamc3  to ensure
*        that relevant values are stored  and not held in registers,  or
*        are not affected by optimizers.
*
*        slamc1 returns the parameters  lbeta, lt, lrnd and lieee1.
*
         call slamc1( lbeta, lt, lrnd, lieee1 )
*
*        start to find eps.
*
         b = lbeta
         a = b**( -lt )
         leps = a
*
*        try some tricks to see whether or not this is the correct  eps.
*
         b = two / 3
         half = one / 2
         sixth = slamc3( b, -half )
         third = slamc3( sixth, sixth )
         b = slamc3( third, -half )
         b = slamc3( b, sixth )
         b = abs( b )
         if( b.lt.leps )
     $      b = leps
*
         leps = 1
*
*+       while( ( leps.gt.b ).and.( b.gt.zero ) )loop
   10    continue
         if( ( leps.gt.b ) .and. ( b.gt.zero ) ) then
            leps = b
            c = slamc3( half*leps, ( two**5 )*( leps**2 ) )
            c = slamc3( half, -c )
            b = slamc3( half, c )
            c = slamc3( half, -b )
            b = slamc3( half, c )
            go to 10
         end if
*+       end while
*
         if( a.lt.leps )
     $      leps = a
*
*        computation of eps complete.
*
*        now find  emin.  let a = + or - 1, and + or - (1 + base**(-3)).
*        keep dividing  a by beta until (gradual) underflow occurs. this
*        is detected when we cannot recover the previous a.
*
         rbase = one / lbeta
         small = one
         do 20 i = 1, 3
            small = slamc3( small*rbase, zero )
   20    continue
         a = slamc3( one, small )
         call slamc4( ngpmin, one, lbeta )
         call slamc4( ngnmin, -one, lbeta )
         call slamc4( gpmin, a, lbeta )
         call slamc4( gnmin, -a, lbeta )
         ieee = .false.
*
         if( ( ngpmin.eq.ngnmin ) .and. ( gpmin.eq.gnmin ) ) then
            if( ngpmin.eq.gpmin ) then
               lemin = ngpmin
*            ( non twos-complement machines, no gradual underflow;
*              e.g.,  vax )
            else if( ( gpmin-ngpmin ).eq.3 ) then
               lemin = ngpmin - 1 + lt
               ieee = .true.
*            ( non twos-complement machines, with gradual underflow;
*              e.g., ieee standard followers )
            else
               lemin = min( ngpmin, gpmin )
*            ( a guess; no known machine )
               iwarn = .true.
            end if
*
         else if( ( ngpmin.eq.gpmin ) .and. ( ngnmin.eq.gnmin ) ) then
            if( abs( ngpmin-ngnmin ).eq.1 ) then
               lemin = max( ngpmin, ngnmin )
*            ( twos-complement machines, no gradual underflow;
*              e.g., cyber 205 )
            else
               lemin = min( ngpmin, ngnmin )
*            ( a guess; no known machine )
               iwarn = .true.
            end if
*
         else if( ( abs( ngpmin-ngnmin ).eq.1 ) .and.
     $            ( gpmin.eq.gnmin ) ) then
            if( ( gpmin-min( ngpmin, ngnmin ) ).eq.3 ) then
               lemin = max( ngpmin, ngnmin ) - 1 + lt
*            ( twos-complement machines with gradual underflow;
*              no known machine )
            else
               lemin = min( ngpmin, ngnmin )
*            ( a guess; no known machine )
               iwarn = .true.
            end if
*
         else
            lemin = min( ngpmin, ngnmin, gpmin, gnmin )
*         ( a guess; no known machine )
            iwarn = .true.
         end if
***
* comment out this if block if emin is ok
         if( iwarn ) then
            first = .true.
            write( 6, fmt = 9999 )lemin
         end if
***
*
*        assume ieee arithmetic if we found denormalised  numbers above,
*        or if arithmetic seems to round in the  ieee style,  determined
*        in routine slamc1. a true ieee machine should have both  things
*        true; however, faulty machines may have one or the other.
*
         ieee = ieee .or. lieee1
*
*        compute  rmin by successive division by  beta. we could compute
*        rmin as base**( emin - 1 ),  but some machines underflow during
*        this computation.
*
         lrmin = 1
         do 30 i = 1, 1 - lemin
            lrmin = slamc3( lrmin*rbase, zero )
   30    continue
*
*        finally, call slamc5 to compute emax and rmax.
*
         call slamc5( lbeta, lt, lemin, ieee, lemax, lrmax )
      end if
*
      beta = lbeta
      t = lt
      rnd = lrnd
      eps = leps
      emin = lemin
      rmin = lrmin
      emax = lemax
      rmax = lrmax
*
      return
*
 9999 format( / / ' warning. the value emin may be incorrect:-',
     $      '  emin = ', i8, /
     $      ' if, after inspection, the value emin looks',
     $      ' acceptable please comment out ',
     $      / ' the if block as marked within the code of routine',
     $      ' slamc2,', / ' otherwise supply emin explicitly.', / )
*
*     end of slamc2
*
      end
*
************************************************************************
*
      real             function slamc3( a, b )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      real               a, b
*     ..
*
*  purpose
*  =======
*
*  slamc3  is intended to force  a  and  b  to be stored prior to doing
*  the addition of  a  and  b ,  for use in situations where optimizers
*  might hold one of these in a register.
*
*  arguments
*  =========
*
*  a, b    (input) real
*          the values a and b.
*
* =====================================================================
*
*     .. executable statements ..
*
      slamc3 = a + b
*
      return
*
*     end of slamc3
*
      end
*
************************************************************************
*
      subroutine slamc4( emin, start, base )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      integer            base, emin
      real               start
*     ..
*
*  purpose
*  =======
*
*  slamc4 is a service routine for slamc2.
*
*  arguments
*  =========
*
*  emin    (output) emin
*          the minimum exponent before (gradual) underflow, computed by
*          setting a = start and dividing by base until the previous a
*          can not be recovered.
*
*  start   (input) real
*          the starting point for determining emin.
*
*  base    (input) integer
*          the base of the machine.
*
* =====================================================================
*
*     .. local scalars ..
      integer            i
      real               a, b1, b2, c1, c2, d1, d2, one, rbase, zero
*     ..
*     .. external functions ..
      real               slamc3
      external           slamc3
*     ..
*     .. executable statements ..
*
      a = start
      one = 1
      rbase = one / base
      zero = 0
      emin = 1
      b1 = slamc3( a*rbase, zero )
      c1 = a
      c2 = a
      d1 = a
      d2 = a
*+    while( ( c1.eq.a ).and.( c2.eq.a ).and.
*    $       ( d1.eq.a ).and.( d2.eq.a )      )loop
   10 continue
      if( ( c1.eq.a ) .and. ( c2.eq.a ) .and. ( d1.eq.a ) .and.
     $    ( d2.eq.a ) ) then
         emin = emin - 1
         a = b1
         b1 = slamc3( a / base, zero )
         c1 = slamc3( b1*base, zero )
         d1 = zero
         do 20 i = 1, base
            d1 = d1 + b1
   20    continue
         b2 = slamc3( a*rbase, zero )
         c2 = slamc3( b2 / rbase, zero )
         d2 = zero
         do 30 i = 1, base
            d2 = d2 + b2
   30    continue
         go to 10
      end if
*+    end while
*
      return
*
*     end of slamc4
*
      end
*
************************************************************************
*
      subroutine slamc5( beta, p, emin, ieee, emax, rmax )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     october 31, 1992
*
*     .. scalar arguments ..
      logical            ieee
      integer            beta, emax, emin, p
      real               rmax
*     ..
*
*  purpose
*  =======
*
*  slamc5 attempts to compute rmax, the largest machine floating-point
*  number, without overflow.  it assumes that emax + abs(emin) sum
*  approximately to a power of 2.  it will fail on machines where this
*  assumption does not hold, for example, the cyber 205 (emin = -28625,
*  emax = 28718).  it will also fail if the value supplied for emin is
*  too large (i.e. too close to zero), probably with overflow.
*
*  arguments
*  =========
*
*  beta    (input) integer
*          the base of floating-point arithmetic.
*
*  p       (input) integer
*          the number of base beta digits in the mantissa of a
*          floating-point value.
*
*  emin    (input) integer
*          the minimum exponent before (gradual) underflow.
*
*  ieee    (input) logical
*          a logical flag specifying whether or not the arithmetic
*          system is thought to comply with the ieee standard.
*
*  emax    (output) integer
*          the largest exponent before overflow
*
*  rmax    (output) real
*          the largest machine floating-point number.
*
* =====================================================================
*
*     .. parameters ..
      real               zero, one
      parameter          ( zero = 0.0e0, one = 1.0e0 )
*     ..
*     .. local scalars ..
      integer            exbits, expsum, i, lexp, nbits, try, uexp
      real               oldy, recbas, y, z
*     ..
*     .. external functions ..
      real               slamc3
      external           slamc3
*     ..
*     .. intrinsic functions ..
      intrinsic          mod
*     ..
*     .. executable statements ..
*
*     first compute lexp and uexp, two powers of 2 that bound
*     abs(emin). we then assume that emax + abs(emin) will sum
*     approximately to the bound that is closest to abs(emin).
*     (emax is the exponent of the required number rmax).
*
      lexp = 1
      exbits = 1
   10 continue
      try = lexp*2
      if( try.le.( -emin ) ) then
         lexp = try
         exbits = exbits + 1
         go to 10
      end if
      if( lexp.eq.-emin ) then
         uexp = lexp
      else
         uexp = try
         exbits = exbits + 1
      end if
*
*     now -lexp is less than or equal to emin, and -uexp is greater
*     than or equal to emin. exbits is the number of bits needed to
*     store the exponent.
*
      if( ( uexp+emin ).gt.( -lexp-emin ) ) then
         expsum = 2*lexp
      else
         expsum = 2*uexp
      end if
*
*     expsum is the exponent range, approximately equal to
*     emax - emin + 1 .
*
      emax = expsum + emin - 1
      nbits = 1 + exbits + p
*
*     nbits is the total number of bits needed to store a
*     floating-point number.
*
      if( ( mod( nbits, 2 ).eq.1 ) .and. ( beta.eq.2 ) ) then
*
*        either there are an odd number of bits used to store a
*        floating-point number, which is unlikely, or some bits are
*        not used in the representation of numbers, which is possible,
*        (e.g. cray machines) or the mantissa has an implicit bit,
*        (e.g. ieee machines, dec vax machines), which is perhaps the
*        most likely. we have to assume the last alternative.
*        if this is true, then we need to reduce emax by one because
*        there must be some way of representing zero in an implicit-bit
*        system. on machines like cray, we are reducing emax by one
*        unnecessarily.
*
         emax = emax - 1
      end if
*
      if( ieee ) then
*
*        assume we are on an ieee machine which reserves one exponent
*        for infinity and nan.
*
         emax = emax - 1
      end if
*
*     now create rmax, the largest machine number, which should
*     be equal to (1.0 - beta**(-p)) * beta**emax .
*
*     first compute 1.0 - beta**(-p), being careful that the
*     result is less than 1.0 .
*
      recbas = one / beta
      z = beta - one
      y = zero
      do 20 i = 1, p
         z = z*recbas
         if( y.lt.one )
     $      oldy = y
         y = slamc3( y, z )
   20 continue
      if( y.ge.one )
     $   y = oldy
*
*     now multiply by beta**emax to get rmax.
*
      do 30 i = 1, emax
         y = slamc3( y*beta, zero )
   30 continue
*
      rmax = y
      return
*
*     end of slamc5
*
      end
      logical          function lsame( ca, cb )
*
*  -- lapack auxiliary routine (version 1.1) --
*     univ. of tennessee, univ. of california berkeley, nag ltd.,
*     courant institute, argonne national lab, and rice university
*     february 29, 1992
*
*     .. scalar arguments ..
      character          ca, cb
*     ..
*
*  purpose
*  =======
*
*  lsame returns .true. if ca is the same letter as cb regardless of
*  case.
*
*  arguments
*  =========
*
*  ca      (input) character*1
*  cb      (input) character*1
*          ca and cb specify the single characters to be compared.
*
*     .. intrinsic functions ..
      intrinsic          ichar
*     ..
*     .. local scalars ..
      integer            inta, intb, zcode
*     ..
*     .. executable statements ..
*
*     test if the characters are equal
*
      lsame = ca.eq.cb
      if( lsame )
     $   return
*
*     now test for equivalence if both characters are alphabetic.
*
      zcode = ichar( 'z' )
*
*     use 'z' rather than 'a' so that ascii can be detected on prime
*     machines, on which ichar returns a value with bit 8 set.
*     ichar('a') on prime machines returns 193 which is the same as
*     ichar('a') on an ebcdic machine.
*
      inta = ichar( ca )
      intb = ichar( cb )
*
      if( zcode.eq.90 .or. zcode.eq.122 ) then
*
*        ascii is assumed - zcode is the ascii code of either lower or
*        upper case 'z'.
*
         if( inta.ge.97 .and. inta.le.122 ) inta = inta - 32
         if( intb.ge.97 .and. intb.le.122 ) intb = intb - 32
*
      else if( zcode.eq.233 .or. zcode.eq.169 ) then
*
*        ebcdic is assumed - zcode is the ebcdic code of either lower or
*        upper case 'z'.
*
         if( inta.ge.129 .and. inta.le.137 .or.
     $       inta.ge.145 .and. inta.le.153 .or.
     $       inta.ge.162 .and. inta.le.169 ) inta = inta + 64
         if( intb.ge.129 .and. intb.le.137 .or.
     $       intb.ge.145 .and. intb.le.153 .or.
     $       intb.ge.162 .and. intb.le.169 ) intb = intb + 64
*
      else if( zcode.eq.218 .or. zcode.eq.250 ) then
*
*        ascii is assumed, on prime machines - zcode is the ascii code
*        plus 128 of either lower or upper case 'z'.
*
         if( inta.ge.225 .and. inta.le.250 ) inta = inta - 32
         if( intb.ge.225 .and. intb.le.250 ) intb = intb - 32
      end if
      lsame = inta.eq.intb
*
*     return
*
*     end of lsame
*
      end
      subroutine  sswap (n,sx,incx,sy,incy)                             
c                                                                       
c     interchanges two vectors.                                         
c     uses unrolled loops for increments equal to 1.                    
c     jack dongarra, linpack, 3/11/78.                                  
c                                                                       
      real sx(1),sy(1),stemp                                            
      integer i,incx,incy,ix,iy,m,mp1,n                                 
c                                                                       
      if(n.le.0)return                                                  
      if(incx.eq.1.and.incy.eq.1)go to 20                               
c                                                                       
c       code for unequal increments or equal increments not equal       
c         to 1                                                          
c                                                                       
      ix = 1                                                            
      iy = 1                                                            
      if(incx.lt.0)ix = (-n+1)*incx + 1                                 
      if(incy.lt.0)iy = (-n+1)*incy + 1                                 
      do 10 i = 1,n                                                     
        stemp = sx(ix)                                                  
        sx(ix) = sy(iy)                                                 
        sy(iy) = stemp                                                  
        ix = ix + incx                                                  
        iy = iy + incy                                                  
   10 continue                                                          
      return                                                            
c                                                                       
c       code for both increments equal to 1                             
c                                                                       
c                                                                       
c       clean-up loop                                                   
c                                                                       
   20 m = mod(n,3)                                                      
      if( m .eq. 0 ) go to 40                                           
      do 30 i = 1,m                                                     
        stemp = sx(i)                                                   
        sx(i) = sy(i)                                                   
        sy(i) = stemp                                                   
   30 continue                                                          
      if( n .lt. 3 ) return                                             
   40 mp1 = m + 1                                                       
      do 50 i = mp1,n,3                                                 
        stemp = sx(i)                                                   
        sx(i) = sy(i)                                                   
        sy(i) = stemp                                                   
        stemp = sx(i + 1)                                               
        sx(i + 1) = sy(i + 1)                                           
        sy(i + 1) = stemp                                               
        stemp = sx(i + 2)                                               
        sx(i + 2) = sy(i + 2)                                           
        sy(i + 2) = stemp                                               
   50 continue                                                          
      return                                                            
      end                                                               
careful! anything free comes with no guarantee.
      subroutine vcost(m,n,x,xt,mdimx,wsave)
c***begin prologue  vcost
c***date written   860701   (yymmdd)
c***revision date  900509   (yymmdd)
c***category no.  j1a3
c***keywords  fast fourier transform, cosine transform, multiple
c             sequences
c***author  boisvert, r. f. (nist)
c***purpose  cosine transform of one or more real, even sequences.
c***description
c
c  subroutine vcost computes the discrete fourier cosine transform
c  of m even sequences x(j,i), j=1,...,m.  the transform is defined
c  below at output parameter x.
c
c  the array wsave which is used by subroutine vcost must be
c  initialized by calling subroutine vcosti(n,wsave).
c
c  input parameters
c
c  m       the number of sequences to be transformed.
c
c  n       the length of the sequence to be transformed.  n must be
c          greater than 1.  the method is most efficient when n-1 is
c          is a product of small primes.
c
c  x       an array of size at least x(mdimx,n) which contains the
c          the sequences to be transformed.  the sequences are stored
c          in the rows of x.  thus, the jth sequence is stored in
c          x(j,i), i=1,..,n.
c
c  xt      a work array of size at least xt(mdimx,n-1).
c
c  mdimx   the first dimension of the array x exactly as it appears in
c          the calling program.
c
c  wsave   a work array which must be dimensioned at least 3*n+15
c          in the program that calls vcost.  the wsave array must be
c          initialized by calling subroutine vcosti(n,wsave), and a
c          different wsave array must be used for each different
c          value of n.  this initialization does not have to be
c          repeated so long as n remains unchanged.  thus subsequent
c          transforms can be obtained faster than the first.
c
c  output parameters
c
c  x       for i=1,...,n and j=1,...,m
c
c             x(j,i) = ( x(j,1)+(-1)**(i-1)*x(j,n)
c
c               + the sum from k=2 to k=n-1
c
c                 2*x(j,k)*cos((k-1)*(i-1)*pi/(n-1)) )/sqrt(2*(n-1))
c
c  wsave   contains initialization calculations which must not be
c          destroyed between calls of vcost.
c
c  -----------------------------------------------------------------
c
c  note  -  a call of vcost followed immediately by another call
c           of vcost will return the original sequences x.  thus,
c           vcost is the correctly normalized inverse of itself.
c
c  -----------------------------------------------------------------
c
c  vcost is a straightforward extension of the subprogram cost to
c  handle m simultaneous sequences.  the scaling of the sequences
c  computed by vcost is different than that of cost.  cost was
c  originally developed by p. n. swarztrauber of ncar.
c
c***references  p. n. swarztrauber, vectorizing the ffts, in parallel
c               computations, (g. rodrigue, ed.), academic press, 1982,
c               pp. 51-83.
c***routines called  vrfftf
c***end prologue  vcost
      dimension       x(mdimx,*), xt(mdimx,*), wsave(*)
c***first executable statement  vcost
      if (m .le. 0)  go to 900
      if (n .le. 1)  go to 900
      if (n .gt. 3)  go to 400
      if (n .eq. 3)  go to 300
c
c  case  n = 2
c
      scale = sqrt(0.50e0)
      do 210 j=1,m
         x1h = scale*(x(j,1)+x(j,2))
         x(j,2) = scale*(x(j,1)-x(j,2))
         x(j,1) = x1h
  210 continue
      go to 900
c
c  case  n = 3
c
  300 continue
      scale = 0.50e0
      do 310 j=1,m
         x1p3 = x(j,1)+x(j,3)
         tx2 = x(j,2)+x(j,2)
         x(j,2) = scale*(x(j,1)-x(j,3))
         x(j,1) = scale*(x1p3+tx2)
         x(j,3) = scale*(x1p3-tx2)
  310 continue
      go to 900
c
c  case  n .gt. 3
c
c     ... preprocessing
c
  400 continue
      nm1 = n-1
      np1 = n+1
      ns2 = n/2
      do 410 j=1,m
         xt(j,1) = x(j,1)-x(j,n)
         x(j,1) = x(j,1)+x(j,n)
  410 continue
      do 420 k=2,ns2
         kc = np1-k
         do 420 j=1,m
            t1 = x(j,k)+x(j,kc)
            t2 = x(j,k)-x(j,kc)
            xt(j,1) = xt(j,1)+wsave(kc)*t2
            t2 = wsave(k)*t2
            x(j,k) = t1-t2
            x(j,kc) = t1+t2
  420 continue
      modn = mod(n,2)
      if (modn .ne. 0) then
         do 430 j=1,m
            x(j,ns2+1) = x(j,ns2+1)+x(j,ns2+1)
  430    continue
      endif
      do 435 j=1,m
         x(j,n) = xt(j,1)
  435 continue
c
c     ... real periodic transform
c
      call vrfftf (m,nm1,x,xt,mdimx,wsave(np1))
c
c     ... postprocessing
c
      factor = 1.0/sqrt(real(nm1))
      do 440 j=1,m
         xt(j,1) = x(j,2)
         x(j,2) = factor*x(j,n)
  440 continue
      do 450 i=4,n,2
         do 450 j=1,m
            xi = x(j,i)
            x(j,i) = x(j,i-2)-x(j,i-1)
            x(j,i-1) = xt(j,1)
            xt(j,1) = xi
  450 continue
      if (modn .ne. 0) then
         do 460 j=1,m
            x(j,n) = xt(j,1)
  460    continue
      endif
c
c     ... normalization
c
      scale = sqrt(0.5)
      do 490 i=1,n
         do 490 j=1,m
            x(j,i) = scale*x(j,i)
  490 continue
c
c  exit
c
  900 continue
      return
      end
careful! anything free comes with no guarantee.
      subroutine vcosti(n,wsave)
c***begin prologue  vcosti
c***date written   860701   (yymmdd)
c***revision date  900509   (yymmdd)
c***category no.  j1a3
c***keywords  fast fourier transform, cosine transform, multiple
c             sequences
c***author  boisvert, r. f. (nist)
c***purpose  initialize for vcost.
c***description
c
c  subroutine vcosti initializes the array wsave which is used in
c  subroutine vcost.  the prime factorization of n together with
c  a tabulation of the trigonometric functions are computed and
c  stored in wsave.
c
c  input parameter
c
c  n       the length of the sequence to be transformed.  the method
c          is most efficient when n-1 is a product of small primes.
c
c  output parameter
c
c  wsave   a work array which must be dimensioned at least 3*n+15.
c          different wsave arrays are required for different values
c          of n.  the contents of wsave must not be changed between
c          calls of vcost.
c
c  -----------------------------------------------------------------
c
c  vcosti is a straightforward extension of the subprogram costi to
c  handle m simultaneous sequences.  costi was originally developed
c  by p. n. swarztrauber of ncar.
c
c***references  p. n. swarztrauber, vectorizing the ffts, in parallel
c               computations, (g. rodrigue, ed.), academic press, 1982,
c               pp. 51-83.
c***routines called  vrffti
c***end prologue  vcosti
      dimension       wsave(*)
c***first executable statement  vcosti
      pi = pimach(1.0)
      if (n .le. 3) return
      nm1 = n-1
      np1 = n+1
      ns2 = n/2
      dt = pi/real(nm1)
      fk = 0.
      do 101 k=2,ns2
         fk = fk+1.
         wsave(k) = 2.*sin(fk*dt)
  101 continue
      fk = 0.
      do 102 k=2,ns2
         kc = np1-k
         fk = fk+1.
         wsave(kc) = 2.*cos(fk*dt)
  102 continue
      call vrffti (nm1,wsave(n+1))
      return
      end
      subroutine vradf2 (mp,ido,l1,cc,ch,mdimc,wa1)
c
c     vrfftpk, version 1, august 1985
c
      dimension   ch(mdimc,ido,2,l1)  ,cc(mdimc,ido,l1,2)     ,
     1                wa1(ido)
      do 101 k=1,l1
         do 1001 m=1,mp
         ch(m,1,1,k) = cc(m,1,k,1)+cc(m,1,k,2)
         ch(m,ido,2,k) = cc(m,1,k,1)-cc(m,1,k,2)
 1001    continue
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            do 1003 m=1,mp
            ch(m,i,1,k) = cc(m,i,k,1)+(wa1(i-2)*cc(m,i,k,2)-
     1       wa1(i-1)*cc(m,i-1,k,2))
            ch(m,ic,2,k) = (wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*
     1       cc(m,i-1,k,2))-cc(m,i,k,1)
            ch(m,i-1,1,k) = cc(m,i-1,k,1)+(wa1(i-2)*cc(m,i-1,k,2)+
     1       wa1(i-1)*cc(m,i,k,2))
            ch(m,ic-1,2,k) = cc(m,i-1,k,1)-(wa1(i-2)*cc(m,i-1,k,2)+
     1       wa1(i-1)*cc(m,i,k,2))
 1003       continue
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 do 106 k=1,l1
         do 1006 m=1,mp
         ch(m,1,2,k) = -cc(m,ido,k,2)
         ch(m,ido,1,k) = cc(m,ido,k,1)
 1006    continue
  106 continue
  107 return
      end
      subroutine vradf3 (mp,ido,l1,cc,ch,mdimc,wa1,wa2)
c
c     vrfftpk, version 1, august 1985
c
      dimension   ch(mdimc,ido,3,l1)  ,cc(mdimc,ido,l1,3)     ,
     1                wa1(ido)     ,wa2(ido)
      arg=2.*pimach(1.0)/3.
      taur=cos(arg)
      taui=sin(arg)
      do 101 k=1,l1
         do 1001 m=1,mp
         ch(m,1,1,k) = cc(m,1,k,1)+(cc(m,1,k,2)+cc(m,1,k,3))
         ch(m,1,3,k) = taui*(cc(m,1,k,3)-cc(m,1,k,2))
         ch(m,ido,2,k) = cc(m,1,k,1)+taur*
     1      (cc(m,1,k,2)+cc(m,1,k,3))
 1001    continue
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            do 1002 m=1,mp
            ch(m,i-1,1,k) = cc(m,i-1,k,1)+((wa1(i-2)*cc(m,i-1,k,2)+
     1       wa1(i-1)*cc(m,i,k,2))+(wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3)))
            ch(m,i,1,k) = cc(m,i,k,1)+((wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*
     1       cc(m,i-1,k,2))+(wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3)))
            ch(m,i-1,3,k) = (cc(m,i-1,k,1)+taur*((wa1(i-2)*
     1       cc(m,i-1,k,2)+wa1(i-1)*cc(m,i,k,2))+(wa2(i-2)*
     1       cc(m,i-1,k,3)+wa2(i-1)*cc(m,i,k,3))))+(taui*((wa1(i-2)*
     1       cc(m,i,k,2)-wa1(i-1)*cc(m,i-1,k,2))-(wa2(i-2)*
     1       cc(m,i,k,3)-wa2(i-1)*cc(m,i-1,k,3))))
            ch(m,ic-1,2,k) = (cc(m,i-1,k,1)+taur*((wa1(i-2)*
     1       cc(m,i-1,k,2)+wa1(i-1)*cc(m,i,k,2))+(wa2(i-2)*
     1       cc(m,i-1,k,3)+wa2(i-1)*cc(m,i,k,3))))-(taui*((wa1(i-2)*
     1       cc(m,i,k,2)-wa1(i-1)*cc(m,i-1,k,2))-(wa2(i-2)*
     1       cc(m,i,k,3)-wa2(i-1)*cc(m,i-1,k,3))))
            ch(m,i,3,k) = (cc(m,i,k,1)+taur*((wa1(i-2)*cc(m,i,k,2)-
     1       wa1(i-1)*cc(m,i-1,k,2))+(wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3))))+(taui*((wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2))))
            ch(m,ic,2,k) = (taui*((wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2))))-(cc(m,i,k,1)+taur*((wa1(i-2)*cc(m,i,k,2)-
     1       wa1(i-1)*cc(m,i-1,k,2))+(wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3))))
 1002       continue
  102    continue
  103 continue
      return
      end
      subroutine vradf4 (mp,ido,l1,cc,ch,mdimc,wa1,wa2,wa3)
c
c     vrfftpk, version 1, august 1985
c
      dimension    cc(mdimc,ido,l1,4)   ,ch(mdimc,ido,4,l1)     ,
     1                wa1(ido)     ,wa2(ido)     ,wa3(ido)
      hsqt2=sqrt(2.)/2.
      do 101 k=1,l1
         do 1001 m=1,mp
         ch(m,1,1,k) = (cc(m,1,k,2)+cc(m,1,k,4))
     1      +(cc(m,1,k,1)+cc(m,1,k,3))
         ch(m,ido,4,k) = (cc(m,1,k,1)+cc(m,1,k,3))
     1      -(cc(m,1,k,2)+cc(m,1,k,4))
         ch(m,ido,2,k) = cc(m,1,k,1)-cc(m,1,k,3)
         ch(m,1,3,k) = cc(m,1,k,4)-cc(m,1,k,2)
 1001    continue
  101 continue
      if (ido-2) 107,105,102
  102 idp2 = ido+2
      do 104 k=1,l1
         do 103 i=3,ido,2
            ic = idp2-i
            do 1003 m=1,mp
            ch(m,i-1,1,k) = ((wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2))+(wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*
     1       cc(m,i,k,4)))+(cc(m,i-1,k,1)+(wa2(i-2)*cc(m,i-1,k,3)+
     1       wa2(i-1)*cc(m,i,k,3)))
            ch(m,ic-1,4,k) = (cc(m,i-1,k,1)+(wa2(i-2)*cc(m,i-1,k,3)+
     1       wa2(i-1)*cc(m,i,k,3)))-((wa1(i-2)*cc(m,i-1,k,2)+
     1       wa1(i-1)*cc(m,i,k,2))+(wa3(i-2)*cc(m,i-1,k,4)+
     1       wa3(i-1)*cc(m,i,k,4)))
            ch(m,i,1,k) = ((wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*
     1       cc(m,i-1,k,2))+(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4)))+(cc(m,i,k,1)+(wa2(i-2)*cc(m,i,k,3)-
     1       wa2(i-1)*cc(m,i-1,k,3)))
            ch(m,ic,4,k) = ((wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*
     1       cc(m,i-1,k,2))+(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4)))-(cc(m,i,k,1)+(wa2(i-2)*cc(m,i,k,3)-
     1       wa2(i-1)*cc(m,i-1,k,3)))
            ch(m,i-1,3,k) = ((wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*
     1       cc(m,i-1,k,2))-(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4)))+(cc(m,i-1,k,1)-(wa2(i-2)*cc(m,i-1,k,3)+
     1       wa2(i-1)*cc(m,i,k,3)))
            ch(m,ic-1,2,k) = (cc(m,i-1,k,1)-(wa2(i-2)*cc(m,i-1,k,3)+
     1       wa2(i-1)*cc(m,i,k,3)))-((wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*
     1       cc(m,i-1,k,2))-(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4)))
            ch(m,i,3,k) = ((wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*
     1       cc(m,i,k,4))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2)))+(cc(m,i,k,1)-(wa2(i-2)*cc(m,i,k,3)-
     1       wa2(i-1)*cc(m,i-1,k,3)))
            ch(m,ic,2,k) = ((wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*
     1       cc(m,i,k,4))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2)))-(cc(m,i,k,1)-(wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3)))
 1003       continue
  103    continue
  104 continue
      if (mod(ido,2) .eq. 1) return
  105 continue
      do 106 k=1,l1
         do 1006 m=1,mp
            ch(m,ido,1,k) = (hsqt2*(cc(m,ido,k,2)-cc(m,ido,k,4)))+
     1       cc(m,ido,k,1)
            ch(m,ido,3,k) = cc(m,ido,k,1)-(hsqt2*(cc(m,ido,k,2)-
     1       cc(m,ido,k,4)))
            ch(m,1,2,k) = (-hsqt2*(cc(m,ido,k,2)+cc(m,ido,k,4)))-
     1       cc(m,ido,k,3)
            ch(m,1,4,k) = (-hsqt2*(cc(m,ido,k,2)+cc(m,ido,k,4)))+
     1       cc(m,ido,k,3)
 1006    continue
  106 continue
  107 return
      end
      subroutine vradf5 (mp,ido,l1,cc,ch,mdimc,wa1,wa2,wa3,wa4)
c
c     vrfftpk, version 1, august 1985
c
      dimension  cc(mdimc,ido,l1,5)    ,ch(mdimc,ido,5,l1)     ,
     1           wa1(ido)     ,wa2(ido)     ,wa3(ido)     ,wa4(ido)
      arg=2.*pimach(1.0)/5.
      tr11=cos(arg)
      ti11=sin(arg)
      tr12=cos(2.*arg)
      ti12=sin(2.*arg)
      do 101 k=1,l1
         do 1001 m=1,mp
         ch(m,1,1,k) = cc(m,1,k,1)+(cc(m,1,k,5)+cc(m,1,k,2))+
     1    (cc(m,1,k,4)+cc(m,1,k,3))
         ch(m,ido,2,k) = cc(m,1,k,1)+tr11*(cc(m,1,k,5)+cc(m,1,k,2))+
     1    tr12*(cc(m,1,k,4)+cc(m,1,k,3))
         ch(m,1,3,k) = ti11*(cc(m,1,k,5)-cc(m,1,k,2))+ti12*
     1    (cc(m,1,k,4)-cc(m,1,k,3))
         ch(m,ido,4,k) = cc(m,1,k,1)+tr12*(cc(m,1,k,5)+cc(m,1,k,2))+
     1    tr11*(cc(m,1,k,4)+cc(m,1,k,3))
         ch(m,1,5,k) = ti12*(cc(m,1,k,5)-cc(m,1,k,2))-ti11*
     1    (cc(m,1,k,4)-cc(m,1,k,3))
 1001    continue
  101 continue
      if (ido .eq. 1) return
      idp2 = ido+2
      do 103 k=1,l1
         do 102 i=3,ido,2
            ic = idp2-i
            do 1002 m=1,mp
            ch(m,i-1,1,k) = cc(m,i-1,k,1)+((wa1(i-2)*cc(m,i-1,k,2)+
     1       wa1(i-1)*cc(m,i,k,2))+(wa4(i-2)*cc(m,i-1,k,5)+wa4(i-1)*
     1       cc(m,i,k,5)))+((wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3))+(wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*cc(m,i,k,4)))
            ch(m,i,1,k) = cc(m,i,k,1)+((wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*
     1       cc(m,i-1,k,2))+(wa4(i-2)*cc(m,i,k,5)-wa4(i-1)*
     1       cc(m,i-1,k,5)))+((wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3))+(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4)))
            ch(m,i-1,3,k) = cc(m,i-1,k,1)+tr11*
     1      ( wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*cc(m,i,k,2)
     1       +wa4(i-2)*cc(m,i-1,k,5)+wa4(i-1)*cc(m,i,k,5))+tr12*
     1      ( wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*cc(m,i,k,3)
     1       +wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*cc(m,i,k,4))+ti11*
     1      ( wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*cc(m,i-1,k,2)
     1       -(wa4(i-2)*cc(m,i,k,5)-wa4(i-1)*cc(m,i-1,k,5)))+ti12*
     1      ( wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*cc(m,i-1,k,3)
     1       -(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*cc(m,i-1,k,4)))
            ch(m,ic-1,2,k) = cc(m,i-1,k,1)+tr11*
     1      ( wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*cc(m,i,k,2)
     1       +wa4(i-2)*cc(m,i-1,k,5)+wa4(i-1)*cc(m,i,k,5))+tr12*
     1     ( wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*cc(m,i,k,3)
     1      +wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*cc(m,i,k,4))-(ti11*
     1      ( wa1(i-2)*cc(m,i,k,2)-wa1(i-1)*cc(m,i-1,k,2)
     1       -(wa4(i-2)*cc(m,i,k,5)-wa4(i-1)*cc(m,i-1,k,5)))+ti12*
     1      ( wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*cc(m,i-1,k,3)
     1       -(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*cc(m,i-1,k,4))))
            ch(m,i,3,k) = (cc(m,i,k,1)+tr11*((wa1(i-2)*cc(m,i,k,2)-
     1       wa1(i-1)*cc(m,i-1,k,2))+(wa4(i-2)*cc(m,i,k,5)-wa4(i-1)*
     1       cc(m,i-1,k,5)))+tr12*((wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3))+(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4))))+(ti11*((wa4(i-2)*cc(m,i-1,k,5)+
     1       wa4(i-1)*cc(m,i,k,5))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2)))+ti12*((wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*
     1       cc(m,i,k,4))-(wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3))))
            ch(m,ic,2,k) = (ti11*((wa4(i-2)*cc(m,i-1,k,5)+wa4(i-1)*
     1       cc(m,i,k,5))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2)))+ti12*((wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*
     1       cc(m,i,k,4))-(wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3))))-(cc(m,i,k,1)+tr11*((wa1(i-2)*cc(m,i,k,2)-
     1       wa1(i-1)*cc(m,i-1,k,2))+(wa4(i-2)*cc(m,i,k,5)-wa4(i-1)*
     1       cc(m,i-1,k,5)))+tr12*((wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3))+(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4))))
            ch(m,i-1,5,k) = (cc(m,i-1,k,1)+tr12*((wa1(i-2)*
     1       cc(m,i-1,k,2)+wa1(i-1)*cc(m,i,k,2))+(wa4(i-2)*
     1       cc(m,i-1,k,5)+wa4(i-1)*cc(m,i,k,5)))+tr11*((wa2(i-2)*
     1       cc(m,i-1,k,3)+wa2(i-1)*cc(m,i,k,3))+(wa3(i-2)*
     1       cc(m,i-1,k,4)+wa3(i-1)*cc(m,i,k,4))))+(ti12*((wa1(i-2)*
     1       cc(m,i,k,2)-wa1(i-1)*cc(m,i-1,k,2))-(wa4(i-2)*cc(m,i,k,5)-
     1       wa4(i-1)*cc(m,i-1,k,5)))-ti11*((wa2(i-2)*cc(m,i,k,3)-
     1       wa2(i-1)*cc(m,i-1,k,3))-(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4))))
            ch(m,ic-1,4,k) = (cc(m,i-1,k,1)+tr12*((wa1(i-2)*
     1       cc(m,i-1,k,2)+wa1(i-1)*cc(m,i,k,2))+(wa4(i-2)*
     1       cc(m,i-1,k,5)+wa4(i-1)*cc(m,i,k,5)))+tr11*((wa2(i-2)*
     1       cc(m,i-1,k,3)+wa2(i-1)*cc(m,i,k,3))+(wa3(i-2)*
     1       cc(m,i-1,k,4)+wa3(i-1)*cc(m,i,k,4))))-(ti12*((wa1(i-2)*
     1       cc(m,i,k,2)-wa1(i-1)*cc(m,i-1,k,2))-(wa4(i-2)*cc(m,i,k,5)-
     1       wa4(i-1)*cc(m,i-1,k,5)))-ti11*((wa2(i-2)*cc(m,i,k,3)-
     1       wa2(i-1)*cc(m,i-1,k,3))-(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4))))
            ch(m,i,5,k) = (cc(m,i,k,1)+tr12*((wa1(i-2)*cc(m,i,k,2)-
     1       wa1(i-1)*cc(m,i-1,k,2))+(wa4(i-2)*cc(m,i,k,5)-wa4(i-1)*
     1       cc(m,i-1,k,5)))+tr11*((wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3))+(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4))))+(ti12*((wa4(i-2)*cc(m,i-1,k,5)+
     1       wa4(i-1)*cc(m,i,k,5))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2)))-ti11*((wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*
     1       cc(m,i,k,4))-(wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3))))
            ch(m,ic,4,k) = (ti12*((wa4(i-2)*cc(m,i-1,k,5)+wa4(i-1)*
     1       cc(m,i,k,5))-(wa1(i-2)*cc(m,i-1,k,2)+wa1(i-1)*
     1       cc(m,i,k,2)))-ti11*((wa3(i-2)*cc(m,i-1,k,4)+wa3(i-1)*
     1       cc(m,i,k,4))-(wa2(i-2)*cc(m,i-1,k,3)+wa2(i-1)*
     1       cc(m,i,k,3))))-(cc(m,i,k,1)+tr12*((wa1(i-2)*cc(m,i,k,2)-
     1       wa1(i-1)*cc(m,i-1,k,2))+(wa4(i-2)*cc(m,i,k,5)-wa4(i-1)*
     1       cc(m,i-1,k,5)))+tr11*((wa2(i-2)*cc(m,i,k,3)-wa2(i-1)*
     1       cc(m,i-1,k,3))+(wa3(i-2)*cc(m,i,k,4)-wa3(i-1)*
     1       cc(m,i-1,k,4))))
 1002       continue
  102    continue
  103 continue
      return
      end
      subroutine vradfg (mp,ido,ip,l1,idl1,cc,c1,c2,ch,ch2,mdimc,wa)
c
c     vrfftpk, version 1, august 1985
c
      dimension     ch(mdimc,ido,l1,ip)   ,cc(mdimc,ido,ip,l1)  ,
     1            c1(mdimc,ido,l1,ip)    ,c2(mdimc,idl1,ip),
     2                ch2(mdimc,idl1,ip)           ,wa(ido)
      tpi=2.*pimach(1.0)
      arg = tpi/float(ip)
      dcp = cos(arg)
      dsp = sin(arg)
      ipph = (ip+1)/2
      ipp2 = ip+2
      idp2 = ido+2
      nbd = (ido-1)/2
      if (ido .eq. 1) go to 119
      do 101 ik=1,idl1
         do 1001 m=1,mp
         ch2(m,ik,1) = c2(m,ik,1)
 1001    continue
  101 continue
      do 103 j=2,ip
         do 102 k=1,l1
            do 1002 m=1,mp
            ch(m,1,k,j) = c1(m,1,k,j)
 1002       continue
  102    continue
  103 continue
      if (nbd .gt. l1) go to 107
      is = -ido
      do 106 j=2,ip
         is = is+ido
         idij = is
         do 105 i=3,ido,2
            idij = idij+2
            do 104 k=1,l1
               do 1004 m=1,mp
               ch(m,i-1,k,j) = wa(idij-1)*c1(m,i-1,k,j)+wa(idij)
     1           *c1(m,i,k,j)
               ch(m,i,k,j) = wa(idij-1)*c1(m,i,k,j)-wa(idij)
     1           *c1(m,i-1,k,j)
 1004          continue
  104       continue
  105    continue
  106 continue
      go to 111
  107 is = -ido
      do 110 j=2,ip
         is = is+ido
         do 109 k=1,l1
            idij = is
            do 108 i=3,ido,2
               idij = idij+2
               do 1008 m=1,mp
               ch(m,i-1,k,j) = wa(idij-1)*c1(m,i-1,k,j)+wa(idij)
     1           *c1(m,i,k,j)
               ch(m,i,k,j) = wa(idij-1)*c1(m,i,k,j)-wa(idij)
     1           *c1(m,i-1,k,j)
 1008          continue
  108       continue
  109    continue
  110 continue
  111 if (nbd .lt. l1) go to 115
      do 114 j=2,ipph
         jc = ipp2-j
         do 113 k=1,l1
            do 112 i=3,ido,2
               do 1012 m=1,mp
               c1(m,i-1,k,j) = ch(m,i-1,k,j)+ch(m,i-1,k,jc)
               c1(m,i-1,k,jc) = ch(m,i,k,j)-ch(m,i,k,jc)
               c1(m,i,k,j) = ch(m,i,k,j)+ch(m,i,k,jc)
               c1(m,i,k,jc) = ch(m,i-1,k,jc)-ch(m,i-1,k,j)
 1012          continue
  112       continue
  113    continue
  114 continue
      go to 121
  115 do 118 j=2,ipph
         jc = ipp2-j
         do 117 i=3,ido,2
            do 116 k=1,l1
               do 1016 m=1,mp
               c1(m,i-1,k,j) = ch(m,i-1,k,j)+ch(m,i-1,k,jc)
               c1(m,i-1,k,jc) = ch(m,i,k,j)-ch(m,i,k,jc)
               c1(m,i,k,j) = ch(m,i,k,j)+ch(m,i,k,jc)
               c1(m,i,k,jc) = ch(m,i-1,k,jc)-ch(m,i-1,k,j)
 1016          continue
  116       continue
  117    continue
  118 continue
      go to 121
  119 do 120 ik=1,idl1
         do 1020 m=1,mp
         c2(m,ik,1) = ch2(m,ik,1)
 1020    continue
  120 continue
  121 do 123 j=2,ipph
         jc = ipp2-j
         do 122 k=1,l1
            do 1022 m=1,mp
            c1(m,1,k,j) = ch(m,1,k,j)+ch(m,1,k,jc)
            c1(m,1,k,jc) = ch(m,1,k,jc)-ch(m,1,k,j)
 1022       continue
  122    continue
  123 continue
c
      ar1 = 1.
      ai1 = 0.
      do 127 l=2,ipph
         lc = ipp2-l
         ar1h = dcp*ar1-dsp*ai1
         ai1 = dcp*ai1+dsp*ar1
         ar1 = ar1h
         do 124 ik=1,idl1
            do 1024 m=1,mp
            ch2(m,ik,l) = c2(m,ik,1)+ar1*c2(m,ik,2)
            ch2(m,ik,lc) = ai1*c2(m,ik,ip)
 1024       continue
  124    continue
         dc2 = ar1
         ds2 = ai1
         ar2 = ar1
         ai2 = ai1
         do 126 j=3,ipph
            jc = ipp2-j
            ar2h = dc2*ar2-ds2*ai2
            ai2 = dc2*ai2+ds2*ar2
            ar2 = ar2h
            do 125 ik=1,idl1
               do 1025 m=1,mp
               ch2(m,ik,l) = ch2(m,ik,l)+ar2*c2(m,ik,j)
               ch2(m,ik,lc) = ch2(m,ik,lc)+ai2*c2(m,ik,jc)
 1025          continue
  125       continue
  126    continue
  127 continue
      do 129 j=2,ipph
         do 128 ik=1,idl1
            do 1028 m=1,mp
            ch2(m,ik,1) = ch2(m,ik,1)+c2(m,ik,j)
 1028       continue
  128    continue
  129 continue
c
      if (ido .lt. l1) go to 132
      do 131 k=1,l1
         do 130 i=1,ido
            do 1030 m=1,mp
            cc(m,i,1,k) = ch(m,i,k,1)
 1030       continue
  130    continue
  131 continue
      go to 135
  132 do 134 i=1,ido
         do 133 k=1,l1
            do 1033 m=1,mp
            cc(m,i,1,k) = ch(m,i,k,1)
 1033       continue
  133    continue
  134 continue
  135 do 137 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 136 k=1,l1
            do 1036 m=1,mp
            cc(m,ido,j2-2,k) = ch(m,1,k,j)
            cc(m,1,j2-1,k) = ch(m,1,k,jc)
 1036       continue
  136    continue
  137 continue
      if (ido .eq. 1) return
      if (nbd .lt. l1) go to 141
      do 140 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 139 k=1,l1
            do 138 i=3,ido,2
               ic = idp2-i
               do 1038 m=1,mp
               cc(m,i-1,j2-1,k) = ch(m,i-1,k,j)+ch(m,i-1,k,jc)
               cc(m,ic-1,j2-2,k) = ch(m,i-1,k,j)-ch(m,i-1,k,jc)
               cc(m,i,j2-1,k) = ch(m,i,k,j)+ch(m,i,k,jc)
               cc(m,ic,j2-2,k) = ch(m,i,k,jc)-ch(m,i,k,j)
 1038          continue
  138       continue
  139    continue
  140 continue
      return
  141 do 144 j=2,ipph
         jc = ipp2-j
         j2 = j+j
         do 143 i=3,ido,2
            ic = idp2-i
            do 142 k=1,l1
               do 1042 m=1,mp
               cc(m,i-1,j2-1,k) = ch(m,i-1,k,j)+ch(m,i-1,k,jc)
               cc(m,ic-1,j2-2,k) = ch(m,i-1,k,j)-ch(m,i-1,k,jc)
               cc(m,i,j2-1,k) = ch(m,i,k,j)+ch(m,i,k,jc)
               cc(m,ic,j2-2,k) = ch(m,i,k,jc)-ch(m,i,k,j)
 1042          continue
  142       continue
  143    continue
  144 continue
      return
      end
careful! anything free comes with no guarantee.
      subroutine vrfftf (m,n,r,rt,mdimr,wsave)
c***begin prologue  vrfftf
c***date written   850801   (yymmdd)
c***revision date  900509   (yymmdd)
c***category no.  j1a1
c***keywords  fast fourier transform, real periodic transform, 
c             fourier analysis, forward transform, multiple sequences
c***author  sweet, r.a. (nist) and lindgren, l.l. (nist)
c***purpose  forward real periodic transform, m sequences.
c***description
c
c  subroutine vrfftf computes the fourier coefficients (forward 
c  transform) of a number of real periodic sequences.  specifically,
c  for each sequence the subroutine claculates the independent
c  fourier coefficients described below at output parameter r.
c
c  the array wsave which is used by subroutine vrfftf must be
c  initialized by calling subroutine vrffti(n,wsave).
c
c
c  input parameters
c
c  m       the number of sequences to be transformed.
c
c  n       the length of the sequences to be transformed.  the method
c          is most efficient when n is a product of small primes,
c          however n may be any positive integer.
c
c  r       areal two-dimensional array of size mdimx x n containing the
c          the sequences to be transformed.  the sequences are stored
c          in the rows of r.  thus, the i-th sequence to be transformed,
c          x(i,j), j=0,1,...,n-1, is stored as
c
c               r(i,j) = x(i,j-1) , j=1, 2, . . . , n.
c
c  rt      a real two-dimensional work array of size mdimx x n.
c
c  mdimr   the row (or first) dimension of the arrays r and rt exactly 
c          as they appear in the calling program.  this parameter is 
c          used to specify the variable dimension of these arrays.
c
c  wsave   a real one-dimensional work array which must be dimensioned
c          at least n+15.  the wsave array must be initialized by 
c          calling subroutine vrffti.  a different wsave array must be
c          used for each different value of n.  this initialization does
c          not have to be repeated so long as n remains unchanged.  the
c          same wsave array may be used by vrfftf and vrfftb.
c
c  output parameters
c
c  r       contains the fourier coefficients f(k) for each of the m 
c          input sequences.  specifically, row i of r, r(i,j), 
c          j=1,2,..,n, contains the independent fourier coefficients
c          f(i,k), for the i-th input sequence stored as
c
c             r(i,1) = real( f(i,0) ),
c                    = sqrt(1/n)*sum(j=0,n-1)[ x(i,j) ],
c
c             r(i,2*k) = real( f(i,k) )
c                      = sqrt(1/n)*sum(j=0,n-1)[x(i,j)*cos(2j*k*pi/n)]
c
c             r(i,2*k+1) = imag( f(i,k) )
c                        =-sqrt(1/n)*sum(j=0,n-1)[x(i,j)*sin(2j*k*pi/n)]
c
c                   for k = 1, 2, . . . , m-1,
c
c              and, when n is even,
c
c              r(i,n) = real( f(i,n/2) ).
c                     = sqrt(1/n)*sum(j=0,n-1)[ (-1)**j*x(i,j) ].
c
c  wsave   contains results which must not be destroyed between calls
c          to vrfftf or vrfftb.
c
c  -----------------------------------------------------------------
c
c  note  -  a call of vrfftf followed immediately by a call of
c           of vrfftb will return the original sequences r.  thus,
c           vrfftb is the correctly normalized inverse of vrfftf.
c
c  -----------------------------------------------------------------
c
c  vrfftf is a straightforward extension of the subprogram rfftf to
c  handle m simultaneous sequences.  rfftf was originally developed
c  by p. n. swarztrauber of ncar.
c
c
c              * * * * * * * * * * * * * * * * * * * * *
c              *                                       *
c              *         program specifications        *
c              *                                       *
c              * * * * * * * * * * * * * * * * * * * * *
c
c
c     dimension of    r(mdimr,n), rt(mdimr,n), wsave(n+15)
c     arguments
c
c     latest          august 1, 1985
c     revision
c
c     subprograms     vrffti, vrfti1, vrfftf, vrftf1, vradf2, vradf3,
c     required        vradf4, vradf5, vradfg, vrfftb, vrftb1, vradb2,
c                     vradb3, vradb4, vradb5, vradbg, pimach
c
c     special         none
c     conditions
c
c     common          none
c     blocks
c
c     i/o             none
c
c     precision       single
c
c     specialist      roland sweet
c
c     language        fortran
c
c     history         written by linda lindgren and roland sweet at the
c                     national bureau of standards (boulder).
c
c     algorithm       a real variant of the stockham autosort version
c                     of the cooley-tukey fast fourier transform.
c
c     portability     american national standards institute fortran 77.
c                     the only machine dependent constant is located in
c                     the function pimach.
c
c     required        cos,sin
c     resident
c     routines
c
c
c***references  p. n. swarztrauber, vectorizing the ffts, in parallel
c               computations, (g. rodrigue, ed.), academic press, 1982,
c               pp. 51-83.
c***routines called  vrftf1
c***end prologue  vrfftf
c
c     vrfftpk, version 1, august 1985
c
      dimension       r(mdimr,n)  ,rt(mdimr,n)    ,wsave(n+15)
c***first executable statement  vrfftf
      if (n .eq. 1) return
      call vrftf1 (m,n,r,rt,mdimr,wsave(1),wsave(n+1))
      return
      end
careful! anything free comes with no guarantee.
      subroutine vrffti (n,wsave)
c***begin prologue  vrffti
c***date written   860701   (yymmdd)
c***revision date  900509   (yymmdd)
c***category no.  j1a1
c***keywords  fast fourier transform, real periodic transform,
c             multiple sequences
c***author  sweet, r.a. (nist) and lindgren, l.l. (nist)
c***purpose  initialization for vrfftf and vrfftb.
c***description
c
c  subroutine vrffti initializes the array wsave which is used in
c  both vrfftf and vrfftb.  the prime factorization of n together with
c  a tabulation of certain trigonometric functions are computed and
c  stored in the array wsave.
c
c  input parameter
c
c  n       the length of the sequence to be transformed.  there is no
c          restriction on n.
c
c  output parameter
c
c  wsave   a work array which must be dimensioned at least n+15.
c          the same work array can be used for both vrfftf and vrfftb
c          as long as n remains unchanged.  different wsave arrays
c          are required for different values of n.  the contents of
c          wsave must not be changed between calls of vrfftf or vrfftb.
c
c
c              * * * * * * * * * * * * * * * * * * * * *
c              *                                       *
c              *         program specifications        *
c              *                                       *
c              * * * * * * * * * * * * * * * * * * * * *
c
c
c     dimension of    r(mdimr,n), rt(mdimr,n), wsave(n+15)
c     arguments
c
c     latest          august 1, 1985
c     revision
c
c     subprograms     vrffti, vrfti1, vrfftf, vrftf1, vradf2, vradf3,
c     required        vradf4, vradf5, vradfg, vrfftb, vrftb1, vradb2,
c                     vradb3, vradb4, vradb5, vradbg, pimach
c
c     special         none
c     conditions
c
c     common          none
c     blocks
c
c     i/o             none
c
c     precision       single
c
c     specialist      roland sweet
c
c     language        fortran
c
c     history         written by linda lindgren and roland sweet at the
c                     national bureau of standards (boulder).
c
c     algorithm       a real variant of the stockham autosort version
c                     of the cooley-tukey fast fourier transform.
c
c     portability     american national standards institute fortran 77.
c                     the only machine dependent constant is located in
c                     the function pimach.
c
c     required        cos,sin
c     resident
c     routines
c
c
c***references  p. n. swarztrauber, vectorizing the ffts, in parallel
c               computations, (g. rodrigue, ed.), academic press, 1982,
c               pp. 51-83.
c***routines called  vrfti1
c***end prologue  vrffti
c
c     vrfftpk, version 1, august 1985
c
      dimension       wsave(n+15)
c***first executable statement  vrffti
      if (n .eq. 1) return
      call vrfti1 (n,wsave(1),wsave(n+1))
      return
      end
      subroutine vrftf1 (m,n,c,ch,mdimc,wa,fac)
c
c     vrfftpk, version 1, august 1985
c
      dimension       ch(mdimc,n) ,c(mdimc,n)  ,wa(n)   ,fac(15)
      nf = fac(2)
      na = 1
      l2 = n
      iw = n
      do 111 k1=1,nf
         kh = nf-k1
         ip = fac(kh+3)
         l1 = l2/ip
         ido = n/l2
         idl1 = ido*l1
         iw = iw-(ip-1)*ido
         na = 1-na
         if (ip .ne. 4) go to 102
         ix2 = iw+ido
         ix3 = ix2+ido
         if (na .ne. 0) go to 101
         call vradf4 (m,ido,l1,c,ch,mdimc,wa(iw),wa(ix2),wa(ix3))
         go to 110
  101    call vradf4 (m,ido,l1,ch,c,mdimc,wa(iw),wa(ix2),wa(ix3))
         go to 110
  102    if (ip .ne. 2) go to 104
         if (na .ne. 0) go to 103
         call vradf2 (m,ido,l1,c,ch,mdimc,wa(iw))
         go to 110
  103    call vradf2 (m,ido,l1,ch,c,mdimc,wa(iw))
         go to 110
  104    if (ip .ne. 3) go to 106
         ix2 = iw+ido
         if (na .ne. 0) go to 105
         call vradf3 (m,ido,l1,c,ch,mdimc,wa(iw),wa(ix2))
         go to 110
  105    call vradf3 (m,ido,l1,ch,c,mdimc,wa(iw),wa(ix2))
         go to 110
  106    if (ip .ne. 5) go to 108
         ix2 = iw+ido
         ix3 = ix2+ido
         ix4 = ix3+ido
         if (na .ne. 0) go to 107
      call vradf5(m,ido,l1,c,ch,mdimc,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  107 call vradf5 (m,ido,l1,ch,c,mdimc,wa(iw),wa(ix2),wa(ix3),wa(ix4))
         go to 110
  108    if (ido .eq. 1) na = 1-na
         if (na .ne. 0) go to 109
         call vradfg (m,ido,ip,l1,idl1,c,c,c,ch,ch,mdimc,wa(iw))
         na = 1
         go to 110
  109    call vradfg (m,ido,ip,l1,idl1,ch,ch,ch,c,c,mdimc,wa(iw))
         na = 0
  110    l2 = l1
  111 continue
      scale=sqrt(1./n)
      if (na .eq. 1) go to 113
      do 112 j=1,n
      do 112 i=1,m
         c(i,j) = scale*ch(i,j)
  112 continue
      return
  113 do 114 j=1,n
      do 114 i=1,m
         c(i,j)=scale*c(i,j)
  114 continue
      return
      end
      subroutine vrfti1 (n,wa,fac)
c
c     vrfftpk, version 1, august 1985
c
      dimension       wa(n)      ,fac(15)    ,ntryh(4)
      data ntryh(1),ntryh(2),ntryh(3),ntryh(4)/4,2,3,5/
      nl = n
      nf = 0
      j = 0
  101 j = j+1
      if (j-4) 102,102,103
  102 ntry = ntryh(j)
      go to 104
  103 ntry = ntry+2
  104 nq = nl/ntry
      nr = nl-ntry*nq
      if (nr) 101,105,101
  105 nf = nf+1
      fac(nf+2) = ntry
      nl = nq
      if (ntry .ne. 2) go to 107
      if (nf .eq. 1) go to 107
      do 106 i=2,nf
         ib = nf-i+2
         fac(ib+2) = fac(ib+1)
  106 continue
      fac(3) = 2
  107 if (nl .ne. 1) go to 104
      fac(1) = n
      fac(2) = nf
      tpi = 2.*pimach(1.0)
      argh = tpi/float(n)
      is = 0
      nfm1 = nf-1
      l1 = 1
      if (nfm1 .eq. 0) return
      do 110 k1=1,nfm1
         ip = fac(k1+2)
         ld = 0
         l2 = l1*ip
         ido = n/l2
         ipm = ip-1
         do 109 j=1,ipm
            ld = ld+l1
            i = is
            argld = float(ld)*argh
            fi = 0.
            do 108 ii=3,ido,2
               i = i+2
               fi = fi+1.
               arg = fi*argld
               wa(i-1) = cos(arg)
               wa(i) = sin(arg)
  108       continue
            is = is+ido
  109    continue
         l1 = l2
  110 continue
      return
      end
careful! anything free comes with no guarantee.
      subroutine vsint(m,n,x,xt,mdimx,wsave)
c***begin prologue  vsint
c***date written   860701   (yymmdd)
c***revision date  900509   (yymmdd)
c***category no.  j1a3
c***keywords  fast fourier transform, sine transform, multiple
c             sequences
c***author  boisvert, r. f., (nist)
c***purpose  sine transform of one or more real, odd sequences.
c***description
c
c  subroutine vsint computes the discrete fourier sine transform
c  of m odd sequences x(j,i), j=1,...,m.  the transform is defined
c  below at output parameter x.
c
c  the array wsave which is used by subroutine vsint must be
c  initialized by calling subroutine vsinti(n,wsave).
c
c  input parameters
c
c  m       the number of sequences to be transformed.
c
c  n       the length of the sequence to be transformed.  the method
c          is most efficient when n+1 is the product of small primes.
c
c  x       an array of size at least x(mdimx,n+1) which contains the
c          the sequences to be transformed.  the sequences are stored
c          in the rows of x.  thus, the jth sequence is stored in
c          x(j,i), i=1,..,n.  the extra column of x is used as work
c          storage.
c
c  xt      a work array of size at least xt(mdimx,n+1).
c
c  mdimx   the first dimension of the array x exactly as it appears in
c          the calling program.
c
c  wsave   a work array with dimension at least int(2.5*n+15)
c          in the program that calls vsint.  the wsave array must be
c          initialized by calling subroutine vsinti(n,wsave), and a
c          different wsave array must be used for each different
c          value of n.  this initialization does not have to be
c          repeated so long as n remains unchanged.
c
c  output parameters
c
c  x       for i=1,...,n and j=1,...,m
c
c               x(j,i)= the sum from k=1 to k=n
c
c                    2*x(j,k)*sin(k*i*pi/(n+1))/sqrt(2*(n+1))
c
c  wsave   contains initialization calculations which must not be
c          destroyed between calls of vsint.
c
c  -----------------------------------------------------------------
c
c  note  -  a call of vsint followed immediately by another call
c           of vsint will return the original sequences x.  thus,
c           vsint is the correctly normalized inverse of itself.
c
c  -----------------------------------------------------------------
c
c  vsint is a straightforward extension of the subprogram sint to
c  handle m simultaneous sequences.  the scaling of the sequences
c  computed by vsint is different than that of sint.  sint was
c  originally developed by p. n. swarztrauber of ncar.
c
c***references  p. n. swarztrauber, vectorizing the ffts, in parallel
c               computations, (g. rodrigue, ed.), academic press, 1982,
c               pp. 51-83.
c***routines called  vrfftf
c***end prologue  vsint
      dimension       x(mdimx,*), xt(mdimx,*), wsave(*)
c***first executable statement  sint
      if (m .le. 0)  go to 900
      if (n .le. 1)  go to 900
      if (n .gt. 2)  go to 300
c
c  case   n = 2
c
      sqrth = sqrt(0.50e0)
      do 201 j=1,m
         xh = sqrth*(x(j,1)+x(j,2))
         x(j,2) = sqrth*(x(j,1)-x(j,2))
         x(j,1) = xh
  201 continue
      go to 900
c
c  case   n .gt. 2
c
c     ... preprocessing
c
  300 continue
      np1 = n+1
      ns2 = n/2
      do 301 j=1,m
         xt(j,1) = 0.0
  301 continue
      do 310 k=1,ns2
         kc = np1-k
         do 310 j=1,m
            t1 = x(j,k)-x(j,kc)
            t2 = wsave(k)*(x(j,k)+x(j,kc))
            xt(j,k+1) = t1+t2
            xt(j,kc+1) = t2-t1
  310 continue
      modn = mod(n,2)
      if (modn .ne. 0) then
         do 320 j=1,m
            xt(j,ns2+2) = 4.0*x(j,ns2+1)
  320    continue
      endif
c
c     ... real periodic transform
c
      nf = ns2+1
      call vrfftf(m,np1,xt,x,mdimx,wsave(nf))
c
c     ... postprocessing
c
      do 330 j=1,m
         x(j,1) = 0.5*xt(j,1)
  330 continue
      do 350 i=3,n,2
         do 340 j=1,m
            x(j,i-1) = -xt(j,i)
  340    continue
         do 345 j=1,m
            x(j,i) = x(j,i-2)+xt(j,i-1)
  345    continue
  350 continue
      if (modn .eq. 0) then
         do 360 j=1,m
            x(j,n) = -xt(j,n+1)
  360    continue
      endif
c
c     ... normalization
c
      scale = sqrt(0.5)
      do 370 i=1,n
         do 370 j=1,m
            x(j,i) = scale*x(j,i)
  370 continue
c
c  exit
c
  900 continue
      return
      end
careful! anything free comes with no guarantee.
      subroutine vsinti(n,wsave)
c***begin prologue  vsinti
c***date written   860701   (yymmdd)
c***revision date  900509   (yymmdd)
c***category no.  j1a3
c***keywords  fast fourier transform, sine transform, multiple
c             sequences
c***author  boisvert, r. f. (nist)
c***purpose  initialize for vsint.
c***description
c
c  subroutine vsinti initializes the array wsave which is used in
c  subroutine sint.  the prime factorization of n together with
c  a tabulation of the trigonometric functions are computed and
c  stored in wsave.
c
c  input parameter
c
c  n       the length of the sequence to be transformed.  the method
c          is most efficient when n+1 is a product of small primes.
c
c  output parameter
c
c  wsave   a work array with at least int(2.5*n+15) locations.
c          different wsave arrays are required for different values
c          of n.  the contents of wsave must not be changed between
c          calls of vsint.
c
c  -----------------------------------------------------------------
c
c  vsinti is a straightforward extension of the subprogram sinti to
c  handle m simultaneous sequences.  sinti was originally developed
c  p. n. swarztrauber of ncar.
c
c***references  p. n. swarztrauber, vectorizing the ffts, in parallel
c               computations, (g. rodrigue, ed.), academic press, 1982,
c               pp. 51-83.
c***routines called  vrffti
c***end prologue  vsinti
      dimension       wsave(*)
c***first executable statement  sinti
      pi = pimach(1.0)
      if (n .le. 1) return
      np1 = n+1
      ns2 = n/2
      dt = pi/real(np1)
      ks = 1
      kf = ks+ns2-1
      fk = 0.
      do 101 k=ks,kf
         fk = fk+1.
         wsave(k) = 2.*sin(fk*dt)
  101 continue
      call vrffti (np1,wsave(kf+1))
      return
      end
