!> restarted BICG solver for linear algebra problem
module bicg_solver
!  use matrix_oper_internals
  use matrix_oper
  use gmres_solver
  use data_mod
  implicit none
  public:: bicg

contains
  !c-------------------------------------------------------c
  !c     List of Routines:
  !c
  !c     bicg

  !> main BICG subroutine for the solution Ax = d and A^T y = g
  subroutine bicg(nsize, x, y, d, g, nit,tol, prod, precond, prodD, precondD, nvec,  &
       tol2,iout,iterates, rezid, not_converge, xi_initP, xi_initD, x0, y0) !XYZ, d0, g0)

    !   preconditioned bicg
    !
    !   Z. STRAKOS, P. TICHY,
    !     ON EFFICIENT NUMERICAL APPROXIMATION OF THE BILINEAR FORM c∗A−1b
    !      SISC 33(2):565-587, 2011
    !

    integer :: nsize, m, nit, nvec, iout
    integer, intent(inout) :: iterates
    real, dimension(1:nsize), intent(inout) :: x  ! solution primal
    real, dimension(1:nsize), intent(inout) :: y  ! solution dual
    real, dimension(1:nsize), intent(in)    :: d  ! RHS primal
    real, dimension(1:nsize), intent(in)    :: g  ! RHS dual
    real, intent(inout)    :: xi_initP    !  initial value of the target quantity - primal
    real, intent(inout)    :: xi_initD    !  initial value of the target quantity - dual
    real, dimension(1:nsize), intent(in) :: x0  ! solution primal - initial outside
    real, dimension(1:nsize), intent(in) :: y0  ! solution dual  -initial outside
    !XYZ real, dimension(1:nsize), intent(in) :: d0  ! original RHS d0 = A x_0 + d
    !XYZ real, dimension(1:nsize), intent(in) :: g0  ! original RHS g0 = A'y_0 + g
    real :: tol, tol2, rezid, etaP, etaD, etaPA, etaDA, cAparametP, cAparametD
    integer, intent(inout) :: not_converge   ! not converge = 1, converge = 0
    integer :: update_iter
    integer, parameter  :: mdim=2
    real :: normB, normX, normA, normR, fac_iter, stop_tol, stop_tolP, stop_tolD, xi_limit
    real :: fac_iter2 
    character(len=20), parameter :: fileName = 'BiCG_conv'

    !ccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
    !c     Version December 1992 : Economised storage            c
    !c     Continuous Householder updates => residual calculated c
    !c     from H matrix without need to calculate  x at each    c
    !c     sweep.                                                c
    !c     Max vectors allowed for is BICG(mdim-1)              c
    !ccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
    !ccc
    !ccc   Big vectors
    !ccc
    !XYZ real, dimension(:, : ), allocatable ::  ws
    !real, dimension(:, : , :), allocatable ::  ws
    real, dimension(:, :), allocatable ::  p, r, rt, q, s, st, xold
    real, dimension(:), allocatable ::  vk, vl, estK !XYZ , x00, y00
    real :: alpha, beta, update, xiB_update_abs, J1, J2, estK_max
    real:: rt_init, st_init, cx_0, by_0
    integer :: iter,k, nitV

    interface
       subroutine prod(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine prod

       subroutine precond(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine precond

       subroutine prodD(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine prodD

       subroutine precondD(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine precondD

    end interface

    !local variables:
    logical :: res_indicator
    integer idump, ioption, ihist, iflag, i, j, kvec
    real dnorm, qtq, ss, ss1, rezid0,rr, bi, hi, xiB, xiB_old
    real :: cx1, cx2, by1, by2, rcx2, rby2, fac
    real :: cx1G, cx2G, by1G, by2G
    real :: estP, estD, eta, estP_old, estD_old, stop_errorP, stop_errorD
    real :: estP_F, estD_F, estP_Fold, estD_Fold
    real :: estP_Finit, estD_Finit, estP0=0., estD0=0.

    !XYZ  -removing of lines needeg for BiCG - JSC 2020 paper
    !XYZ  - not necessary for nonlinear problems

    res_indicator = .false.

    if(state%nlSolver%non_alg_stop == 'rezL2') then
      res_indicator = .true.
      stop "rez_indicator_for BiCG not tested"
    end if

    if(state%time%iter == 0 .and. state%nlSolver%TAiter == 1) then
       open( 12, file = fileName , status = 'replace', action="write" )
    else
       open( 12, file = fileName , status = 'UNKNOWN', position="append" )
    endif
    close(12)



    estP   = 0. ; estD   = 0.;  estP_old  = 0.;  estD_old  = 0. ! quantities C2
    estP_F = 0. ; estD_F = 0.;  estP_Fold = 0.;  estD_Fold = 0. ! quantities \etaA

    !call TEST_MATRICES(nsize, proD, prodD)
    !stop "matrices tested"

    ! case CR, limit values
    !xi_limit = 0.4047974550429  ! +- 5E-14  ! crossUni3500.grid, P1 (after 4 RESTART steps)
    !xi_limit = 0.4071783143507507 ! "exact" ! crossUni3500.grid, P2 (after 4 RESTART steps)
    !xi_limit = 0.4075262691478035 ! "exact" ! crossUni3500.grid, P4 (after 4 RESTART steps)
    xi_limit = 0.4076152998044911  ! "exact" ! crossAdapt2.grid, P2 (after 4 RESTART steps)
    !xi_limit = 0.4076169203362077  ! "exact" ! crossAdapt2.grid, P4 (after 4 RESTART steps)
    !xi_limit = 0.40761691481    ! +- 1E-11  ! crossAdapt3.grid, P2 (after 4 RESTART steps)
    !xi_limit = 0.4076178565940745 ! +- 5E-12  ! crossAdapt3.grid, P4 (after 4 RESTART steps)
    !xi_limit = 0.40761786368370525  ! the "EXACT" value for cross
    !xi_limit =  0.07408120962  ! the "EXACT" value for Carpio 2013, state%model%icase == 80

    ! xi
    stop_tol =  state%linSolver%tol * state%space%adapt%tol_max
    !stop_tol =  1E-02 * state%space%adapt%tol_max  ! used for ESCO18

    if(res_indicator ) stop_tol = state%linSolver%tol

    eta = sqrt( state%estim(dwrEtaI_aver,1) ) 
    if( eta > 0) then
       stop_tol =  eta * state%linSolver%tol *  state%nlSolver%tol2
    else
       stop_tol =   state%linSolver%tol *  state%nlSolver%tol2
    endif

    
    if(state%space%adapt%adapt_level == 0) then
       state%linSolver%tol_bicg = 1E-4
    else
       eta = sqrt( state%estim(dwrEtaI_aver,1) )
       state%linSolver%tol_bicg = min( state%linSolver%tol_bicg * 0.5, 2.5E-1 * eta  )
       !state%linSolver%tol_bicg = min( state%linSolver%tol_bicg * 0.5, &
       !     state%linSolver%tol * eta  )
       state%linSolver%tol_bicg = max( state%linSolver%tol_bicg , &
            state%linSolver%tol * state%space%adapt%tol_max  )
       !state%linSolver%tol_bicg = 1E-5 * state%space%adapt%tol_max
    endif


    ! (negative linSolver%tol  in *.ini file)
    if (.not. state%linSolver%tol_fixed) stop_tol = state%linSolver%tol_bicg

    stop_tolP = stop_tol
    stop_tolD = stop_tol

    ! initialization of variables
    cx1 = xi_initP  !0.  ! QUANTITIES GIVEN USING THE TRANSFORMATION OF THE SYSTEM
    cx2 = 0.             !  A x = b   ==>     A x' = b' := b - A x_0
    by1 = xi_initD  !0.
    by2 = 0.

    cx1G = xi_initP  !0. ! QUANTITIES GIVEN BY THE ORIGINAL SYSTEMS
    cx2G = 0.
    by1G = xi_initD  !0.
    by2G = 0.

    m = nsize
    xiB_update_abs = 0.

    ! vecors for BiCG: p(:, 1) = p_k, p(:, 2) = p_{k+1}, etc.
    allocate( p(1:nsize, 1:mdim), source = 0.0 ) ! direction of the primal problem
    allocate( q(1:nsize, 1:mdim), source = 0.0 ) ! direction of the dual problem
    allocate( r(1:nsize, 1:mdim), source = 0.0 ) ! residual of the primal problem
    allocate(rt(1:nsize, 1:mdim), source = 0.0 ) ! residual of the primal problem PRECOND
    allocate( s(1:nsize, 1:mdim), source = 0.0 ) ! residual for the dual problem
    allocate(st(1:nsize, 1:mdim), source = 0.0 ) ! residual for the dual problem PRECOND
    allocate(vk(1:nsize), vl(1:nsize), source = 0.0 )  ! local array
    !XYZ  allocate(x00(1:nsize), y00(1:nsize), source = 0.0 )  ! local array

    allocate(xold(1:nsize, 1:mdim), source = 0.0 ) ! old solution for the error estimates
    !XYZ  allocate(estK(1:grid%nelem) )  ! estimates of the local algebraic errors
    !allocate(ws(1:6, 1:nsize, 0:nit), source = 0.0 )  ! store of all vcetors, REMOVE LATER
    !XYZ allocate(ws(1:6, 1:nsize), source = 0.0 )  ! store of all vcetors, REMOVE LATER

    ! regularization of the initial data in case when:
    ! d * g = 0 .and.  x = 0   .and.   y = 0
    if( abs( dot_product(d(1:nsize), g(1:nsize) ) ) < 1E-30 .and. &
         norm2(x(1: nsize )) < 1E-15 .and. &
         norm2(y(1: nsize )) < 1E-15 ) then
       print*
       print*,'orthogonal RHS and vanishing x0, y0', &
             dot_product(d(1:nsize), g(1:nsize)), norm2(x(1: nsize)), norm2(y(1: nsize))
       print*

       call RegularizeVectorST_from_wST( x(1: nsize ) )

    endif

    ! INITIAL VALUE
    cx_0 = xi_initP
    by_0 = xi_initD

    ! XYZ  ! modifying of the input
    ! XYZ  if(state%time%iter == -10 .and. norm2(y(1: nsize )) < 1E-15 ) then
    ! XYZ     print*
    ! XYZ     print*, 'changing of init y !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'
    ! XYZ     print*
    ! XYZ     y(1:nsize) = 1.
    ! XYZ     call prodD(vk(1:nsize) , y(1:nsize), nsize)
    ! XYZ     fac = dot_product( g(1:nsize), vk(1:nsize) ) /dot_product(vk(1:nsize),vk(1:nsize))
    ! XYZ     y(1:nsize) = fac

    ! XYZ     xi_initP = xi_initP + dot_product( y(:), d(:) )
    ! XYZ     xi_initD = xi_initP  SOME INACCURACY

    ! XYZ     cx_0 = dot_product( x(1:nsize) , g(1:nsize)  )
    ! XYZ     by_0 = dot_product( y(1:nsize) , d(1:nsize)  )

    ! XYZ  endif

    ! XYZ  x00(1:nsize)  = x(1:nsize)
    ! XYZ  y00(1:nsize)  = y(1:nsize)

    ! INITIAL residuals and directions
    ! PRIMAL PROBLEM
    ! A x
    call prod(vk(1:nsize),  x(1:nsize) ,nsize)

    !  d -  A x the initial residual
    r(1:nsize,1) = d(1:nsize) - vk(1:nsize)

    ! P (d - Ax) the initial preconditioned residual, initial direction
    call precond(p(1:nsize,1), r(1:nsize, 1) ,nsize)
    !write(*,'(a10, 200es14.6)') 'SOL iluAx:', qd(1:nsize,2)

    rt(1:nsize,1) = p(1:nsize,1)
    rt_init = norm2(rt( 1:nsize, 1) )


    ! DUAL PROBLEM
    !  A^T x
    !write(*,'(a10, 200es14.6)') 'SOL y:', y(1:nsize)
    call prodD(vk(1:nsize) , y(1:nsize), nsize)

    !  g -  A^T y the initial dual residual
    s(1:nsize,1) = g(1:nsize) - vk(1:nsize)

    !  g -  A^T y the initial preconditional dual residual, initial direction update
    call precondD(q(1:nsize, 1),  s(1:nsize,1) ,nsize)

    st_init = norm2(q( 1:nsize, 1) )

    ! value of the target quantity
    xiB =0.
    xiB_old = 0.

    ! initial values
    estP_Finit = abs(dot_product(r(1:nsize,  1), y(1:nsize) + y0(1:nsize) ) )
    estD_Finit = abs(dot_product(s(1:nsize,  1), x(1:nsize) + x0(1:nsize) ) )

    estP_Fold = estP_Finit
    estD_Fold = estD_Finit

    !print*,'#### estP_ini, estD_ini = ', estP_Finit, estD_Finit

    ! number of iterations for the meassure of the update
    update_iter = nvec  !  set before call SolveBlockLinearBigPrimalDualProblem as restart !

    !write(*,'(a5, 2i5, 4es12.4, es22.14, es12.4)') 'Resid', 0, nit,  0., 0., &
    !     !dot_product(r(:, 1) , r(:, 1)),dot_product(s(:, 1) , s(:, 1)), &  ! residuals
    !     dot_product(rt(:, 1),rt(:, 1)),dot_product(st(:, 1),st(:, 1)), &     ! preconditioned residuals
    !     !ss, ss1
    !     xi_initP + xiB, abs(xiB  - xiB_old)

    ! MAIN BiCG  LOOP
    do iter = 1, nit
       iterates = iter

       ! P A p_k
       !write(*,'(a10, 200es14.6)') 'SOL x:', x(1:nsize)
       call prod(vk(1:nsize),  p(1:nsize, 1) ,nsize)


       ss = dot_product( q(1:nsize, 1) , vk(1:nsize) )
       if(abs(ss) <= 0.E-35) then
          print*,' too small value (1) ss = ', ss, iter
          ! k = 10
          ! do i=0,nsize-1, k
          !    if(norm2( s(i+1: min(nsize, i+k),1 ) ) > 1E-15) then
          !       write(81, *) grid%elem(i/10+1)%xc(:)
          !    endif
          !    if(norm2( r(i+1: min(nsize, i+k),1 ) ) > 1E-15) then
          !       write(82, *) grid%elem(i/10+1)%xc(:)
          !    endif
          !    if(norm2( rt(i+1: min(nsize, i+k), 1 ) ) > 1E-15) then
          !       write(83, *) grid%elem(i/10+1)%xc(:)
          !    endif
          ! enddo

          stop
       endif

       alpha = dot_product(s(1:nsize, 1), rt(1:nsize, 1) ) /ss

       x(1:nsize) = x(1:nsize) + alpha * p(1:nsize, 1)

       !REM  ... the real residuum
       !XXX
       !call prod(ws(1,1:nsize), x(1:nsize),  nsize)
       !XXX
       !ws(1,1:nsize) = ws(1,1:nsize) - d(1:nsize)
       !call precond(ws(2,1:nsize),  ws(1,1:nsize) ,nsize)    ! preconditioned residuum

       r(1:nsize, 2) = r(1:nsize, 1) - alpha * vk(1:nsize)    ! r_{k+1}
       call precond(rt(1:nsize, 2),  r(1:nsize,2) ,nsize)    ! \tilde{r}_{k+1}

       y(1:nsize) = y(1:nsize)  + alpha * q(1:nsize, 1)

       !  A^T q_k
       call prodD(vl(1:nsize), q(1:nsize, 1) ,nsize)

       s(1:nsize, 2) = s(1:nsize, 1) - alpha * vl(1:nsize)

       call precondD(st(1:nsize, 2),  s(1:nsize, 2) ,nsize)

       ss1 = dot_product( s(1:nsize, 1) , rt(1:nsize, 1) )
       if(abs(ss1) <= 0.E-35) then
          print*,' too small value (2) ss = '
          print*,' s_k . rt_k = ', ss1, iter, nsize
          print*,' s_{k+1} . rt_{k+1} = ',dot_product(s(1:nsize, 2), rt(1:nsize, 2) )
          ! k = 10
          ! do i=0,nsize-1, k
          !    write(79,'(a6, i5, 300es14.6)') 'b rhsP:',i+1, d(i+1: min(nsize, i+k) )
          !    write(78,'(a6, i5, 300es14.6)') 'c rhsD:',i+1, g(i+1: min(nsize, i+k) )

          !    write(99,'(a6, i5, 300es14.6)') ' vec s:',i+1, s(i+1: min(nsize, i+k),2 )
          !    write(98,'(a6, i5, 300es14.6)') ' vec rt:',i+1, rt(i+1: min(nsize, i+k),2 )
          !    if(norm2( s(i+1: min(nsize, i+k),2 ) ) <= 1E-15) then
          !       write(89, *) grid%elem(i/10+1)%xc(:)
          !    endif
          !    if(norm2( rt(i+1: min(nsize, i+k),2 ) ) <= 1E-15) then
          !       write(88, *) grid%elem(i/10+1)%xc(:)
          !    endif
          !    if(norm2( d(i+1: min(nsize, i+k) ) ) <= 1E-15) then
          !       write(87, *) grid%elem(i/10+1)%xc(:)
          !    endif
          !    if(norm2( g(i+1: min(nsize, i+k) ) ) <= 1E-15) then
          !       write(86, *) grid%elem(i/10+1)%xc(:)
          !    endif
          ! enddo
          stop
       endif

       beta = dot_product(s(1:nsize, 2), rt(1:nsize, 2) ) / ss1

       !print*,' s_k . rt_k = ', ss1, dot_product(s(1:nsize, 2), rt(1:nsize, 2) ), beta

       p(1:nsize, 2) = rt(1:nsize, 2) + beta * p(1:nsize, 1)
       q(1:nsize, 2) = st(1:nsize, 2) + beta * q(1:nsize, 1)

       ! update of the target quantity
       update = alpha * dot_product(s(1:nsize, 1) , rt(1:nsize, 1))
       xiB = xiB + update
       xiB_update_abs = xiB_update_abs + abs(update)

       !print*,'############################################################'
       !print*
       !print*, 'loop index iter = ', iter
       !print*

       !XYZ  J1 = cx_0 + dot_product( x(1:nsize) , g(1:nsize)  )
       !XYZ  !J1 = xi_initP + dot_product( x(1:nsize) , g(1:nsize)  )
       !XXXxxx
       !XYZ  !XYZ  J2 = by_0 + dot_product( y(1:nsize) , d(1:nsize)  )
       !XYZ  !J2 = xi_initD + dot_product( y(1:nsize) , d(1:nsize)  )

       if(mod(iter,update_iter) == 0)  then
          ! the following is not correct, the RHS is modified !!
          !cx2 = dot_product( x(1:nsize)+x0(1:nsize) , g(1:nsize)  )
          !by2 = dot_product( y(1:nsize)+y0(1:nsize) , d(1:nsize)  )

          ! approximation of J(u_h) of the first kind
          !XYZ  cx2 = J1
          !XYZ  by2 = J2
          !XYZ  cx2G = J1 - dot_product(y0(1:nsize), r(1:nsize,2))! corrections to correct values
          !XYZ  by2G = J2 - dot_product(x0(1:nsize), s(1:nsize,2))! corrections to correct values
          ! direct variant ??
          !cx2G = dot_product(y(1:nsize) + y0(1:nsize), d0(1:nsize) )
          !by2G = dot_product(x(1:nsize) + x0(1:nsize), g0(1:nsize) )

          !write(*,'(a10, 60es12.4)') 'Norm1',cx_0, xi_initP, &
          !     norm2(y),norm2(y0), norm2(d), norm2(d0)

          ! algebraic errors \etaA,  \etaAD
          estP_F = dot_product(r(1:nsize,  2), y(1:nsize) + y0(1:nsize) )
          estD_F = dot_product(s(1:nsize,  2), x(1:nsize) + x0(1:nsize) )

          ! residuals times solutions
          !XYZ estP = dot_product(r(1:nsize,  2), y(1:nsize)) ! + y0(1:nsize) )
          estD = dot_product(s(1:nsize,  2), x(1:nsize)) ! + x0(1:nsize) )

          ! lost of orthogonality, angles between solutions and residuals
          !estP0 = dot_product(r(1:nsize,  2), y(1:nsize)  - y00(1:nsize) ) !+ y0(1:nsize) )
          !estP0 = abs(estP0) / norm2(r(1:nsize,  2)) &
          !     /norm2( y(1:nsize) - y00(1:nsize) )
          !estD0 = dot_product(s(1:nsize,  2), x(1:nsize)  - x00(1:nsize) ) !+ x0(1:nsize) )
          !estD0 = abs(estD0) / norm2(s(1:nsize,  2)) &
          !     /norm2(x(1:nsize)  - x00(1:nsize) )

          if(estP_Finit <= 0 ) then
             estP_Finit = abs(estP_F)
             estP_Fold = estP_Finit
          endif

          if(estD_Finit <= 0 ) then
             estD_Finit = abs(estD_F)
             estD_Fold = estD_Finit
          end if

          ! approximation of J(u_h) of the second kind
          !XYZ  rcx2 = J1 + estP
          !xxx
          !XYZ  rby2 = J2 + estD


          ! vector : \Delta x * g  suitable for localication
          !xxx
          !xold(1:nsize, 2) =  ( x(1:nsize) - xold(1:nsize, 1)) * g(1:nsize)
          !call LocalizeBIG(nsize, xold(1:nsize, 2), estK(1:grid%nelem) )
          !estK_max = maxval( abs(estK(1:grid%nelem)) )
          !xxx

          !print*,'###:', iter, Cx2-cx1, dot_product( x(1:nsize) - xold(1:nsize, 1), g(1:nsize)  ), &
          !     Cx2 - cx1  -  dot_product(x(1:nsize) - xold(1:nsize, 1) , g(1:nsize)  ), &
          !     sum( estK(1:grid%nelem) ), sum( estK(1:grid%nelem) ) - Cx2 + cx1, estK_max

          !xxx
          !xold(1:nsize, 1) = x(1:nsize)

          !write(*, '(i5, a3, 3es14.6, a18, 4es12.4)') iter, &
          !     'J=',  xi_initP ,  rcx2, xi_initD+xiB, &
          !     'eta, etaD, zeta:',  abs(estP_F), abs(estD_F), abs(xiB  - xiB_old)


       endif
       ! if( mod(iter, update_iter) == 0) &
       !      write(*,'(a5, 2i5, 4es12.4, es22.14,8es12.4)')'Resid',iter, nit,  alpha, beta,&
       !      !dot_product(r(:, 2) , r(:, 2)),dot_product(s(:, 2) , s(:, 2)), &  ! residuals
       !      norm2(rt(:, 2)), norm2(st(:, 2)), &     ! preconditioned residuals
       !      !ss, ss1
       !      xi_initP + xiB, xiB, update,  abs(xiB  - xiB_old), &
       !      abs(cx2 - cx1), abs(by2 - by1)
       !      !abs( xi_initP + xiB - J1), abs( xi_initP + xiB - J2), abs(xiB  - xiB_old), xiB_update_abs
       nitV = nit/2
       !fac_iter = state%time%iter + 0.5 &
       !     + ( (state%nlSolver%iter-1) + 1 *(iter-1)/nitV) / (state%nlSolver%max_iter*1.1)

       fac_iter = state%nlSolver%TAiter + 1. *(iter-1)/nitV -0.5
       
       nitV = nit/5
       !fac_iter2 = state%time%iter + 0.5 &
       !     + ( (state%nlSolver%iter-1) + 1.*(iter-1)/nitV) / (state%nlSolver%max_iter*1.1)

       fac_iter2 = state%nlSolver%TAiter + 1. *(iter-1)/nitV -0.5 

       !if( mod(iter, update_iter) == 0) &
       !     print*,'###EEE:', iter, nit,  ( 1.*(state%nlSolver%iter) -1 ), &
       !     fac_iter2
       
       ! output to the file BiCG_conv
       if( mod(iter, update_iter) == 0) then

          !write(11,55) iter, &
          !     !rcx2, rcx2-xi_limit, cx2 - cx1 + estP, cx2, cx1, cx2-cx1, estP
          !     cx2-cx1, estP, cx2-cx1+ estP,  &
          !     estP_old, estP - estP_old, cx2 - cx1 + estP - estP_old,  &
          !     xiB  , xiB_old, xiB  - xiB_old

          open( 12, file = fileName , status = 'UNKNOWN', position="append" )
          if(iter == update_iter) write(12, '(x)')
55 format( i5 ' & ',  9(es12.4, '&') '\\')

          !write(12,'(5i6, 5es12.4 , 4es22.14, 20es12.4)')  &
          if( state%nlSolver%iter == 1 .and. iter ==  update_iter) then
             write(12,'(x)')
             write(12,*) "#", 1,2,3,4., 5, 6., 7., 8.,9.,10., 11., 12., 13.,14.,15., &
                  16.,17.,18.,19., &
                  20.,21., 22.,23.,24.,25.,26.,27.,28., 29., 30., 31.,32.,33., 34.,35., &
                  36., 37., 38., 39.
          endif

          if(iter .eq. 1) write(12,'(x)') 
          ! a short output
          write(12,*) &
               state%time%iter , state%nlSolver%iter, iter, (1.*nsize)**(1./3), &
               !iter + state%linSolver%iter_tot,  &   !nit, &
               fac_iter2,  &  !5
               fac_iter, &
               abs(xiB  - xiB_old), &                ! 7 estimates of the third type
               abs(estP_F), abs(estD_F), &           ! 8..9
               abs(xiB  - xiB_old) + abs(estP_F), &  ! 10
               abs(xiB  - xiB_old) + abs(estD_F), &  ! 11
               stop_tolP, stop_tolD                  ! 12.. 13
          close(12)
          ! write(12,*) &
          !      state%time%iter , state%nlSolver%iter, iter, (1.*nsize)**(1./3), &
          !      !iter + state%linSolver%iter_tot,  &   !nit, &
          !      fac_iter2,  &  !5
          !      fac_iter, alpha, beta, &   ! 6..8
          !      norm2(rt(:, 2)), norm2(st(:, 2)), &     ! ! 9..10  preconditioned residuals
          !      cx2, by2,  &       ! 11..12  approximations of J(u_h) first kind (A)
          !      rcx2, rby2,  &     ! 13..14  approximations of J(u_h) second type (B)
          !      xi_initP + xiB,  xi_initD + xiB,  & ! 15..16 , third type
          !      abs(estP), abs(estD),  &     ! 17..18  difference (A) - (B) = orthogonality
          !      abs(estP0), abs(estD0) , & ! 19..20 lost of orthogonality, cos of angle
          !      abs(cx2 - cx1), & ! + estP), & ! 21 modif estimates of first kind
          !      abs(by2 - by1), & ! + estD), & ! 22 modif estimates of first kind
          !      abs(cx2 - cx1 + estP - estP_old), & ! 23 modif estimated of 2nd kind P
          !      abs(by2 - by1 + estD - estD_old), & ! 24 modif estimated of 2nd kind D
          !      abs(xiB  - xiB_old), &            ! 25 estimates of the third type
          !      estK_max, stop_tol, stop_tol/ grid%nelem, &  !26..28
          !      state%linSolver%tol_bicg, norm2( ws(1,1:nsize) ), & ! 29..30
          !      abs(cx2-xi_limit), abs(by2-xi_limit),  &   ! 31..22  ERROR first kind
          !      abs(rcx2-xi_limit), abs(rby2-xi_limit),  & ! 33..34  ERROR second kind
          !      abs(xi_initP + xiB-xi_limit),  abs(xi_initD + xiB-xi_limit),& !35..36 ERR3rd
          !      abs(xiB  - xiB_old + estP_old), &  ! 37
          !      abs(estP_F), abs(estD_F), & ! 38..39
          !      abs(xiB  - xiB_old) + abs(estP_F), &  ! 40
          !      abs(xiB  - xiB_old) + abs(estD_F), &  ! 41
          !      dot_product( g0(1:nsize), x(1:nsize) + x0(1:nsize) ), & ! 42..42
          !      abs(cx2 - cx1 + estP - estP_old) + abs(estP_F), &  ! 43 E2 + aDWR
          !      abs(by2 - by1 + estD - estD_old) + abs(estD_F)     ! 44
          ! close(12)
       endif   !!if( mod(iter, update_iter) == 0) then



       !xxx
       !state%linSolver%residuum = max( norm2(r(:, 2)), norm2(s(:, 2)) )
       !stopping criterion from residuum
       !if(state%linSolver%residuum  < 1E-10) goto 500

       ! stopping criterion from the target quantity
       if(mod(iter,update_iter) == 0 )  then
          ! modified xi-stopping criterion
          !if( abs(xiB  - xiB_old + estD_old) < stop_tol) goto 500
          !stop_errorP = abs(xiB  - xiB_old )           ! E3
          !stop_errorD = abs(xiB  - xiB_old )

          !stop_errorP = abs(cx2 - cx1 + estP - estP_old) ! E2
          !stop_errorD =   abs(by2 - by1 + estD - estD_old)

          !stop_errorP = abs(cx2 - cx1 + estP - estP_old) + abs(estP_F)! E2 + aDWR
          !stop_errorD = abs(by2 - by1 + estD - estD_old) + abs(estD_F)

          stop_errorP = abs(xiB  - xiB_old) + abs(estP_F)  ! E3 + aDWR
          stop_errorD = abs(xiB  - xiB_old) + abs(estD_F)

          !stop_errorP = abs(estP_F)  ! including alg. error
          !stop_errorD = abs(estD_F)

          ! residuum stopping criterion
          !XYZ if(res_indicator ) then
          !XYZ   stop_errorP = norm2(rt(:, 2) ) / rt_init
          !XYZ   stop_errorD = norm2(st(:, 2) ) / st_init
          !XYZ endif

          !!XYZ ! special criterion for xiA
          ! if (.not. state%linSolver%tol_fixed .and. state%time%iter_loc > 1) then
          !    print*,' SWITCH ON :'
          !    print*, " * !if(state%time%maxiter == 43) nloops = 2  ", &
          !         " ! SIMULATION of  aDWR stopping criterion"
          !    print*, " * !call Provisorious_aDWR(finish_out )"
          !    print*,"  * for faster computation SWITCH ON"
          !    print*,"     IS SKIPPED, IT IS CALLED  in Provisorious_aDWR 3x"

          !    ! estimates primal/dual & algebraic/space
          !    etaPA = sqrt(state%estim( dwrEtaI_primalA, 1))
          !    etaP  = sqrt(state%estim( dwrEtaI_primal, 1))
          !    etaDA = sqrt(state%estim( dwrEtaI_dualA, 1))
          !    etaD  = sqrt(state%estim( dwrEtaI_dual, 1))

          !    cAparametP =  state%linSolver%tol * etaP/ max(etaPA, 1E-15)
          !    cAparametD =  state%linSolver%tol * etaD/ max(etaDA, 1E-15)

          !    stop_tolP = estP_Finit * cAparametP * 0.5  ! 0.5 security factor
          !    stop_tolD = estD_Finit * cAparametD * 0.5

          !    stop_errorP = abs(estP_F)
          !    stop_errorD = abs(estD_F)

          !    print*
          !    print*
          !    write(*,'(a8, 8es12.4)') 'ESD pr:', &
          !         etaPA, etaP, abs(estP_F), estP_Finit, stop_errorP, stop_tolP, cAparametP
          !    write(*,'(a8, 8es12.4)') 'ESD du:', &
          !         etaDA, etaD, abs(estD_F), estD_Finit, stop_errorD, stop_tolD, cAparametD
          !!XYZ endif

          ! Local c \Delta xi-stopping criterion for ESCO 18
          !if ( estK_max  < stop_tol/ grid%nelem  .and. iter > 10) goto MODIF

          ! DWR-stopping criterion
          !if(estP < stop_tol .and. estD < stop_tol) goto

          if( stop_errorP < stop_tolP .and.  stop_errorD < stop_tolD) goto 500

          ! nulifying of the update history
          xiB_old = xiB
          xiB_update_abs = 0.

          !XYZ  cx1 = cx2
          !XYZ  by1 = by2
          !XYZ  cx1G = cx2G
          !XYZ  by1G = by2G

          estP_old = estP
          estD_old = estD

          estP_Fold = estP_F
          estD_Fold = estD_F
       endif

       !if( dot_product(r(:, 2) , r(:, 2)) < 1E-15 .and. dot_product(s(:, 2) , s(:, 2))<1E-15) goto 500

       ! update
       r(1:nsize,  1) =  r(1:nsize, 2)
       rt(1:nsize, 1) = rt(1:nsize, 2)
       s(1:nsize,  1) =  s(1:nsize, 2)
       st(1:nsize, 1) = st(1:nsize, 2)
       p(1:nsize,  1) =  p(1:nsize, 2)
       q(1:nsize,  1) =  q(1:nsize, 2)

       !ws(1, :, iter) = r(:, 1)
       !ws(2, :, iter) = s(:, 1)
       !ws(3, :, iter) = p(:, 1)
       !ws(4, :, iter) = q(:, 1)

       ! print*,'ORTHOGONALITY test'

       ! do i=0, iter-1
       !    write(*,'(a10, 2i5, 4es14.6)') ' r . s:',i, iter, &
       !         dot_product(ws(1, :, iter), ws(2, :, i) ), &
       !         dot_product(ws(2, :, iter), ws(1, :, i) )
       ! enddo

       ! print*,'ORTHOGONALITY test'

       ! do i=0, iter-2
       !    write(*,'(a10, 2i5, 4es14.6)') 'q . Ap:',i, iter-1, &
       !         dot_product(ws(5, :, iter-1), ws(6, :, i) ), &
       !         dot_product(ws(6, :, iter-1), ws(5, :, i) )
       ! enddo

    end do
500 continue

    state%linSolver%residuum = max( norm2(r(:, 2)), norm2(s(:, 2)) )

    ! updates in lin_solvers.f90
    !state%linSolver%iter = state%linSolver%iter + iter
    !state%linSolver%iter_tot = state%linSolver%iter_tot + iter

    ! compute residuum directly
    !XXX call prod(vk(1:nsize),  x(1:nsize) ,nsize)
    !XXX vk(1:nsize) = d(1:nsize) - vk(1:nsize)

    ! k = 6
    ! do i=0,nsize-1, k
    !    write(21,'(a6, i5, 300es14.6)') 'rezP:',i+1, vk(i+1: min(nsize, i+k) )
    !    write(31,'(a6, i5, 300es14.6)') 'rezP:',i+1, r(i+1: min(nsize, i+k), 1 )
    !    write(35,'(a6, i5, 300es14.6)') 'solD:',i+1, y(i+1: min(nsize, i+k) ) + y0(i+1: min(nsize, i+k) ) !, d(i+1: min(nsize, i+k) )
    ! enddo
    ! do i=0,nsize-1, k
    ! !   write(*,'(a6, i5, 300es14.6)') 'rhsD:',i+1, y(i+1: min(nsize, i+k) ), g(i+1: min(nsize, i+k) )
    ! enddo

    !write(*,'(a20, es20.12, a20, es12.4))')  &
    !write(*, '(a20, 2es24.16, a12, es24.16)') &
    !     'target_quantity L = ', xi_initP+xiB, xi_initD+xiB, ',   update = ',xiB

    ss = xi_initP
    xi_initP = xi_initP + xiB
    !xi_initP = rcx2
    xi_initD = abs(xiB  - xiB_old) + abs(estP_F) 
    
    !write(*, '(a20, 2es24.16, a12, es24.16)') &
    !     'target_quantity G = ', J1, J2,', Bi J(u_h)=',  xi_initP
    !write(*, '(a20, 2es24.16, a12, es24.16)') &
    !     'differences: ', abs(J1-xi_initP) , abs(J2-xi_initP),', from limit=',  &
    !     abs(xi_initP - xi_limit)

    !write(*, '(i5,a3, 3es14.6, a18, 3es12.4)') 9999,&
    !     'J=',  ss, xi_initP ,  xi_initD+xiB, &
    !     'eta, etaD, zeta:',  abs(estP_F), abs(estD_F), abs(xiB  - xiB_old)


    deallocate( p,q,r,s,vk, vl, st, rt, xold)
    !XYZ  deallocate(ws, x00, y00, estK)

  end subroutine bicg

  subroutine TEST_MATRICES(nsize, prod, prodD)
    integer, intent(in) :: nsize
    real, dimension(:), allocatable :: x, y, z, d
    integer :: i, j
    integer :: i1, i2, dof
    interface
       subroutine prod(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine prod

       subroutine prodD(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine prodD
    end interface

    allocate(x(1:nsize), y(1:nsize), z(1:nsize), d(1:nsize) )

    print*
    print*
    print*,'TEST_MATRICES: multiplication of A and A^T'
    print*
    print*

    dof = 3

    i1 = 395 * dof
    i2 = 358 * dof

    do i=1, nsize
       x = 0.
       x(i) = 1.

       call prod(y(1:nsize),  x(1:nsize) ,nsize)
       call prodD(z(1:nsize),  x(1:nsize) ,nsize)

       do j=1, nsize
          d(j) = abs(y(j) - z(j)) / max( max(abs(z(j)), abs(y(j)) ), 1E-15 )
          !if(d(j) > 1E-5) write(*,*) i,-j, y(j),z(j), d(j)
          if( (i > i1-dof .and. i <= i1 .and. j > i1-dof .and. j <= i1) .or. &
               (i > i2-dof .and. i <= i2 .and. j > i2-dof .and. j <= i2) .or. &
               (i > i1-dof .and. i <= i1 .and. j > i2-dof .and. j <= i2) .or. &
               (j > i1-dof .and. j <= i1 .and. i > i2-dof .and. i <= i2) ) &
               write(94,*) j,i, y(j), z(j), d(j)
       enddo

       !x = 0.
       !x(i) = 1.


       !do j=1, nsize
       !   if( abs(i - 50) <= 1 ) then
       !      if(y(j) /= 0.) write(102,*) i, -j, y(j)
       !   endif
       !enddo

    enddo


  end subroutine TEST_MATRICES


  subroutine Write_matrixes_vectors(inum, nsize, prod, prodD, b, c, v, w)
    integer, intent(in) :: inum
    integer, intent(in) :: nsize
    real, dimension(:), intent(in) :: b, c, v, w
    real, dimension(:), allocatable :: x, y, z, d
    integer :: i, j, ifile
    integer :: i1, i2, dof
    character(len=50) :: data_name
    character(len=5) :: ch5
    integer :: num_size, text_size, long_text_size, file_size
    integer :: is


    interface
       subroutine prod(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine prod

       subroutine prodD(b,x,nsize)
         integer, intent (in):: nsize
         real, dimension(1:nsize), intent(in) :: x
         real, dimension(1:nsize), intent(inout) :: b
       end subroutine prodD
    end interface

    long_text_size = 10
    text_size = 4
    num_size = 5
    file_size = text_size + num_size

    data_name = 'mat-00000    '
    if(inum > 0) then
       !is = int(log(1.*inum)/log(10.)) ! SOMETIMES makes troubles due to rounding
       if(inum > 0    .and. inum <= 9)    is = 0
       if(inum > 9    .and. inum <= 99)   is = 1
       if(inum > 99   .and. inum <= 999)  is = 2
       if(inum > 999  .and. inum <= 9999) is = 3
       if(inum > 9999 .and. inum <= 99999)is = 4
    else
       is = 0
    endif

    !print*,'!!!',inum,is, num_size+text_size-is, num_size+text_size, num_size-is, num_size

    write( ch5, '(i5)' ) inum  ! change the format if num_size /= 5 !!!
    data_name(num_size+text_size-is:num_size+text_size) = ch5(num_size-is: num_size)

    ifile = 10
    open(ifile, file = data_name, status='UNKNOWN')

    allocate(x(1:nsize), y(1:nsize), z(1:nsize), d(1:nsize) )

    !print*
    !print*
    !print*,'TEST_MATRICES: multiplication of A and A^T'
    !print*
    !print*

    !dof = 3

    !i1 = 395 * dof
    !i2 = 358 * dof
    write(ifile, *) '# Matrix:  ', nsize
    do i=1, nsize
       x = 0.
       x(i) = 1.

       call prod(y(1:nsize),  x(1:nsize) ,nsize)
       !call prodD(z(1:nsize),  x(1:nsize) ,nsize)

       do j=1, nsize
          !d(j) = abs(y(j) - z(j)) / max( max(abs(z(j)), abs(y(j)) ), 1E-15 )
          !if(d(j) > 1E-5) write(*,*) i,-j, y(j),z(j), d(j)
          !if( (i > i1-dof .and. i <= i1 .and. j > i1-dof .and. j <= i1) .or. &
          !     (i > i2-dof .and. i <= i2 .and. j > i2-dof .and. j <= i2) .or. &
          !     (i > i1-dof .and. i <= i1 .and. j > i2-dof .and. j <= i2) .or. &
          !     (j > i1-dof .and. j <= i1 .and. i > i2-dof .and. i <= i2) ) &
          if(y(j) /= 0.)  write(ifile, *) j,i, y(j) !, z(j)   !, z(j), d(j)
       enddo

       !x = 0.
       !x(i) = 1.


       !do j=1, nsize
       !   if( abs(i - 50) <= 1 ) then
       !      if(y(j) /= 0.) write(102,*) i, -j, y(j)
       !   endif
       !enddo

    enddo

    write(ifile, '(x)')
    write(ifile, *) '# RHS:'
    do j=1, nsize
       write(ifile, *) j, b(j), c(j)
    enddo

    write(ifile, '(x)')
    write(ifile, *) '# solution:'
    do j=1, nsize
       write(ifile, *) j, v(j), w(j)
    enddo

    close(ifile)

  end subroutine Write_matrixes_vectors


  subroutine Write_matrixes_vectors_fast(inum, eta, nsize,  b, c, v, w, ilu)
    integer, intent(in) :: inum
    real, intent(in) :: eta
    integer, intent(in) :: nsize
    real, dimension(:), intent(in) :: b, c, v, w
    logical, intent(in) :: ilu
    real, dimension(:), allocatable :: x, y, z, d
    integer :: i, j, k, i1, j1, k1, ifile
    integer :: jj, ie, ie1, m, m1
    class(element), pointer :: elem , elem1 ! one element
    character(len=50) :: data_name
    character(len=5) :: ch5
    integer :: num_size, text_size, long_text_size, file_size
    integer :: is,iss, is1, dof, dof1, ndof, ndof1, Tdof, Tdof1, dofAll1
    integer :: p_mod, q_mod,bigNSize, max_dof, max_Tdof, dofAll
    real :: val


    long_text_size = 10
    text_size = 4
    num_size = 5
    file_size = text_size + num_size

    data_name = 'Mat-00000    '
    if(inum > 0) then
       !is = int(log(1.*inum)/log(10.)) ! SOMETIMES makes troubles due to rounding
       if(inum > 0    .and. inum <= 9)    is = 0
       if(inum > 9    .and. inum <= 99)   is = 1
       if(inum > 99   .and. inum <= 999)  is = 2
       if(inum > 999  .and. inum <= 9999) is = 3
       if(inum > 9999 .and. inum <= 99999)is = 4
    else
       is = 0
    endif

    !print*,'!!!',inum,is, num_size+text_size-is, num_size+text_size, num_size-is, num_size

    write( ch5, '(i5)' ) inum  ! change the format if num_size /= 5 !!!
    data_name(num_size+text_size-is:num_size+text_size) = ch5(num_size-is: num_size)

    ifile = 10
    open(ifile, file = data_name, status='UNKNOWN')

    write(ifile, *) '# Matrix:  ', nsize

    p_mod = state%getP_mod()
    q_mod = state%getQ_mod()

    if (nsize /= state%bigNSize( p_mod , q_mod )) &
         stop 'wrong size of vectors in  Write_matrixes_vectors_fast!'


    bigNSize = state%bigNSize(p_mod, q_mod)

    max_dof = DOFtriang( DegFromDofTriang(state%space%max_dof) + p_mod )
    max_Tdof = state%time%max_Tdof + q_mod

    associate( time => state%time )
    select type( time )
    type is ( TimeTDG_t )

       do ie=1,grid%nelem

          !print*, "b in the beginning = ", norm2(b)
          elem => grid%elem(ie)

          dof = DOFtriang( elem%deg + p_mod )
          Tdof = elem%Tdof + q_mod
          ndof = dof * ndim
          dofAll = ndof * Tdof

          iss = elem%bigNcv(p_mod, q_mod) -1

          if (eta /= 0. .and. q_mod > 0 ) then
             stop "bMVprodBIG not implemented for q_mod > 0 ??  VERIFY"
          endif

          do i = 1,Tdof
             do j = 1, ndim
                m = (i-1)*dof*ndim + (j-1)*dof
                do k = 1, dof

                   do i1 = 1,Tdof
                      do j1 = 1, ndim
                         m1 = (i1-1)* ndof + (j1-1)*dof
                         do k1 = 1, dof

                            if(ilu) then
                               val = elem%ILU(0)%Mb(m+k, m1+k1)
                            else
                               val = elem%bigBlock(0)%Mb(k, k1, j, j1, i, i1)
                            endif

                            if(eta /= 0. .and. j == j1) &
                                 val = val +  eta * time%refTimeMatrix%Mb(i,i1) &
                                 * elem%Mass%Mb(k, k1)

                            write(ifile, *) iss+m+k, iss + m1+k1 , val

                         end do
                      end do
                   end do
                end do ! k
             end do ! j
          end do !i


          !! off-diagonal blocks
          do jj=1,elem%flen
             ie1 = grid%elem(ie)%face(neigh,jj)

             if(ie1 > 0) then
                elem1 => grid%elem(ie1)
                dof1 = DOFtriang( elem1%deg + p_mod )
                Tdof1 = elem1%Tdof + q_mod
                ndof1 = dof1 * ndim

                is1 = elem1%bigNcv(p_mod, q_mod) - 1
                dofAll1 = dof1 * Tdof1 * ndim

                do i = 1,Tdof
                   do j = 1, ndim
                      m = (i-1)*dof*ndim + (j-1)*dof
                      do k = 1, dof

                         do i1 = 1,Tdof1
                            do j1 = 1, ndim
                               m1 = (i1-1)* ndof1 + (j1-1)*dof1
                               do k1 = 1, dof1

                                  if(ilu) then
                                     val = elem%ILU(jj)%Mb(m+k, m1+k1)
                                  else
                                     val = elem%bigBlock(jj)%Mb(k, k1, j, j1, i, i1)
                                  endif

                                  write(ifile, *) iss+m+k, is1 + m1+k1 , val

                               end do
                            end do
                         end do
                      end do ! k
                   end do ! j
                end do !i

             endif
          enddo !jj

       enddo !ie

       class default
       stop 'bMVprodST only for STDG method'

    end select
  end associate

  if(.not. ilu) then
     write(ifile, '(x)')
     write(ifile, *) '# RHS:'
     do j=1, nsize
        write(ifile, *) j, b(j), c(j)
     enddo

     write(ifile, '(x)')
     write(ifile, *) '# solution:'
     do j=1, nsize
        write(ifile, *) j, v(j), w(j)
     enddo
  endif

  close(ifile)

end subroutine Write_matrixes_vectors_fast


end module bicg_solver



