!> general error estimation subroutines
module estimates
  use ama_L2interpol
  use dwr_mod
  use dwr_res
  use dual_problem_mod
  use main_data  ! contains type(mesh) :: grid for computation
  use euler_problem
  use terms_mod
  use apost_estimation
  use project_estimation
  use solution_mod
  use eval_jumps
  use eval_sol
  use elemental_mod
  use higher_order_local

  implicit none

  public :: computeDWRAnisotropicEstimates
  public :: computeDWRestimates
  public :: computeNonlinDWRestimates
  public :: controlIfEtaIisLessThanEtaII
  public :: DualDWRrezidErrorEstimates
  public :: PrimalDWRrezidErrorEstimates
  public :: FluxVectorDifference
  public :: moveDWREstimatesEtaI
  public :: PWpolynomialReconstComputeTerms
  !public :: reconstructSolution
  public :: ReconstrPrimalDualSolutions
  public :: ReconstrPrimalSTSolutions
  public :: RezidErrorEstimates
  public :: SolutionDifference
  public :: SetVectorsFields
  public :: Set_Elem_Regul_Estim_Decay
  !public :: RitzReconstruction
  public :: RitzReconstr_PrimalDual_big
  public :: IterativeRitzReconstr_PrimalDual_big
  public :: IterativeRitzReconstr_PrimalST_big
  public :: writeIntoEtaFile
  public :: EstimateNewton_DWR

contains

  !> clear the arrays elem%estim_locL
  subroutine Clear_Elem_Estim_locL( )
    class(element), pointer :: elem
    integer :: i

    do i = 1, grid%nelem
       elem => grid%elem(i)
       elem%estim_locL = 0.
    enddo
  end subroutine Clear_Elem_Estim_locL

  !> perform the error estimates using the dual norm
  !> including (non-)linear algebraic error
  subroutine RezidErrorEstimates( onlyAS, Ttime_updated )
    logical, intent(in) :: onlyAS   ! only space and algebraic estimates
    logical, intent(in) :: Ttime_updated ! Ttime was already updated by %tau(1)
    class(element), pointer :: elem, elem1
    real, dimension(:), allocatable :: L_estim
    real, dimension(:,:), allocatable :: wi
    real, dimension(:,:,:), allocatable :: Dwi
    real :: machine_tol, rmax, t0, t1, t2, ttime, val, val1, weight
    integer :: i, j, k, ndof, ndofP, itest, imax, ipoc, ndimD
    logical :: loc_implicitly


    !print*,'####  RezidErrorEstimates  start',onlyAS, Ttime_updated
    !!call cpu_time(t0)

    !itest = 360
    itest = -480

    ttime = state%time%ttime
    if(Ttime_updated) state%time%ttime = state%time%ttime - state%time%tau(1)

    allocate(L_estim(1:max_eta) )
    loc_implicitly = state%nlSolver%implicitly

    state%nlSolver%implicitly = .false.
    grid%elem(:)%deg_plus = .true.


    ! setting of fields elem%vec(rhs,*), elem%vec(rhsT,*) for error estimate
    call SetVectorsFields( onlyAS )

    call cpu_time(t0)


    state%estim(max_resT_S,:) = 0.
    state%estim(min_resT_S,:) =  1E+50
    state%estim(min_resT_S_loc,:) =  1E+50
    state%estim(resA_ST,:) = 1E+50

    rmax = 0.

    call cpu_time(t1)

    L_estim(:) = 0.   ! total value of the residuum

    do i=1, grid%nelem
       elem => grid%elem(i)
       ! NOT MULTIPLIED,1/tau in project.f90 removed
       !elem%vec(rhs,:) = elem%vec(rhs,:) * state%time%tau(1)

       ! the following should be performed for the STDGM approach
       !elem%vec(rhsT,:) = elem%vec(rhsT,:) / state%time%tau(1)

       !!!call EnergyReziduumElemEstimate(elem)  ! element residuum
       !do j=1,4


       if( state%time%disc_time /= 'STDG') then
          call DualElemEstimate(elem, 3, onlyAS)  ! 3 => element residuum in X- norm
       else
          if( state%modelName == 'porous') then
             call ST_DualElemEstimate_Var2(elem,3)  ! CORRECT SCALING OF PARAMETERS
             !call ST_DualElemEstimate(elem, 3 )  ! 3 => element residuum in X- norm

             !call ST_DualElemEstimate(elem, 2 )  ! 2 => element residuum in the H^1-norm (for inviscid??)
          else   ! Navier-Stokes, Eulerian, ... TRY IT!
             !if (i == 1) then
             !   print*, "ST_DualElemEstimate called for resS computation?"
             !end if
             call ST_DualElemEstimate(elem, 3 )  ! 3 => element residuum in X- norm
             !call ST_DualElemEstimate_Var2(elem,3)  ! CORRECT SCALING OF PARAMETERS
          endif
       endif

       !if(elem%i < 10 .or. elem%i == 1234) &
       !     write(*,'(a10, 2i5, 300es12.4)') 'etas:',elem%i, elem%dof,  elem%eta(1:5,:)

       ! limitation along the shock waves
       val = 1.
       !if(state%type_IC == 8) val = 2*elem%area/elem%diam  ! .or. abs(elem%xc(1) - 1.) > 2E-2) then

       ! POROUS MEDIA FLOW, detection of the zero intersection
       !call Elem_detect_intersect(elem)
       !if(elem%intersect_zero )  val = 0.1

       !if (val < 0.9) write(*,'(a8, i5, 30es12.4)') 'intersect', elem%i, val, elem%eta(resA:resST, 1)


       !if(val > 1E-2) then
       if(state%type_IC /= 8  .or. abs(elem%xc(1) - 1.) > 5E-2) then
          ndimD = 1
          !ndimD = ndim

          L_estim( resA) = L_estim( resA) + val * sum(elem%eta(resA,  1:ndimD)**2 )
          L_estim( resS) = L_estim( resS) + val * sum(elem%eta(resS,  1:ndimD)**2 )
          L_estim( resT) = L_estim( resT) + val * sum(elem%eta(resT,  1:ndimD)**2 )
          L_estim(resST) = L_estim(resST) + val * sum(elem%eta(resST, 1:ndimD)**2 )
          L_estim(resSr) = L_estim(resSr) + val * sum(elem%eta(resSr, 1:ndimD)**2 )

       endif

       ! Verfurth approach
       !print*,'$$$', elem%eta(resT, 1), elem%eta(resS, 1)
       if(elem%eta(resS, 1) > 0.) &
            state%estim(min_resT_S_loc,1) = min(state%estim(min_resT_S_loc,1),  &
            elem%eta(resT, 1) /  elem%eta(resS, 1) )

       !if(elem%eta(resT, 1) > rmax) then
       !   rmax = elem%eta(resT, 1)
       !   imax = i
       !endif

       state%estim(max_resT_S,1) = max(state%estim(max_resT_S,1),  &
            elem%eta(resT, 1) / max(1E-15, elem%eta(resS, 1)) )
       !!state%estim(resA_ST,:) = 1E+50

    enddo

    !print*,'L_estim:', L_estim(resA:resSr)
    !print*, 'stopped in estimates.f90 after ST_DualElemEstimate', 1./L_estim(1)
    ! stop


    state%L_estim(1:max_eta) = max(sqrt(L_estim(1:max_eta)), 1E-30)
    !!!state%L_estim( resS) = 0.003
    !print*,'    state%L_estim( resS):', state%L_estim(resS), resS

    machine_tol = 1.E-01
    state%estim(resA_ST_loc,1) = 0.

    ! local algebraic criterion
    do i=1,grid%nelem
       elem => grid%elem(i)

       if( elem%eta(resST, 1) >  machine_tol * state%L_estim(resST) / grid%nelem**0.5 ) then

          state%estim(resA_ST_loc,1) = max(state%estim(resA_ST_loc,1), &
               elem%eta(resA, 1)/ elem%eta(resST, 1))

          !write(198,*) elem%xc(:), elem%eta(resA, 1), elem%eta(resST, 1) ,elem%eta(resA, 1)/ elem%eta(resST, 1), elem%i
       endif

    enddo


    ! steady-state approach
    !print*, 'here' , state%L_estim(:)  !, state%L_estim(resS)
    if (state%L_estim(resS) == 0.0 ) then
      print*, 'FR: Problem in RezidErrorEstimates, state%L_estim(resS) = ' , state%L_estim(resS)
      print*, 'Other L_estim:' , state%L_estim(:)
      stop
    endif

    state%estim(resA_ST,1) = state%L_estim(resA) /max(1E-15, state%L_estim(resS))

    ! STDG approach
    state%estim(min_resT_S, 1) = state%L_estim(resT)/ max(1E-15, state%L_estim(resS))
    !print*,'#### WERTY',state%estim(min_resT_S, 1) , state%estim(min_resT_S_loc,1), state%estim(max_resT_S,1)

    grid%elem(:)%deg_plus = .false.

    state%nlSolver%implicitly  = loc_implicitly

    !write(*,'(a10,i5, 20es12.4)') '##RDE342ed', state%nlSolver%iter, state%estim(1:9, 1)
    !write(*,'(a10,i5, 20es12.4)') '##RDE342ed', state%nlSolver%iter, state%L_estim(1:4)


    call cpu_time(t2)
    state%CPU_constaint = state%CPU_constaint + t2 - t1

    !write(*,'(a10,i5, 20es12.4)') '##RDE342ed', state%nlSolver%iter, state%estim(1:9, 1)
    !write(*,'(a10,i5, 20es12.4)') '##RDE342ed', state%nlSolver%iter, state%L_estim(1:4)

    ! computing of quantities for the mesh refinement
    if(.not. onlyAS) then

       !call JumpsEvaluation( )

       !if(.not. grid%ElemSupports) & ! create the list of elements sharing at least a vertex with elem
       !     call SeekElemSupports(grid)

       ! averaging of the estimate
       ! setting of 'elem%estim_loc' including elem%eta(resST, 1) of neighbours elements
       ! elem%estim_loc**2 = sum_{K\in N(K)} elem%eta(resST, 1)**2
       do i=1,grid%nelem
          elem => grid%elem(i)
          if( state%time%disc_time /= 'STDG') then
             elem%estim_loc = elem%eta(resS, 1)**2    ! Verfurth approach
          else
             if(state%time_dependent) then  !!!  VERIFY ????
                elem%estim_loc = elem%eta(resST, 1)**2    ! STDGM approach
             else
                elem%estim_loc = elem%eta(resST, 1)**2 /state%time%tau(1)   ! STDGM approach
             endif

          endif

          ipoc = 0

          !if(i == 1) write(*,'(a8,20es12.4)') '#@#@@#@',elem%estim_loc

          weight = 0.0
          if(weight > 0.) then
             ! only neighbouring elements
             ! FOR HP_STAEDY
             do j=1,elem%flen
                k = elem%face(neigh,j)

                ! all elements sharing at least a vertex
                !  FOR ST_ESTIMS
                !do j=1,elem%isupp
                !   k = elem%supp(j,1)

                if(k > 0) then
                   elem1 => grid%elem(k)
                   if( state%time%disc_time /= 'STDG') then
                      elem%estim_loc = elem%estim_loc + weight*elem1%eta(resS, 1)**2  ! Verfurth
                      !elem%estim_loc = max(elem%estim_loc , elem1%eta(resS, 1)**2)   ! Verfurth
                   else
                      elem%estim_loc = elem%estim_loc + weight*elem1%eta(resST, 1)**2   ! STDGM approach
                   endif

                   ipoc = ipoc + 1
                endif
             enddo
             ! !!  elem%estim_loc = elem%estim_loc**0.5  ! we store the square
             elem%estim_loc = elem%estim_loc / (1. + weight*ipoc)
          endif !if(weight > 0.) then

          !if(i == 1) write(*,'(a8,20es12.4)') '#@#@@#@2',elem%estim_loc,elem%estim_locL

          !!write(*,'(a6,i5,6es12.4)')'EST:xd',elem%i, elem%eta(resS, 1), elem%estim_loc, 1.*ipoc, (1. + weight*ipoc)
          elem%estim_loc = sqrt( elem%estim_loc)  ! + elem%jumpsJh)  !!elem%rezid)

          ! if(elem%i <= 5 .or. elem%i >= grid%nelem -3) then
          !    val = 0.
          !    do j=1,grid%nelem
          !       val = val + grid%elem(j)%eta(resST, 1)**2
          !    enddo

          !    write(*,'(a8, i5, 30es12.4)') &
          !      '#E#E#:', elem%i, elem%estim_loc, elem%eta(resST, 1), state%L_estim(resST), &
          !      sum( grid%elem(:)%estim_loc * grid%elem(:)%estim_loc ) , val, sqrt( L_estim(resST))
          ! endif


          ! storing of several time levs - IF USED MUST BE DONE IN DIFFERENT WAY
          ! OUTSIDE OF  RezidErrorEstimates
          ! ALREADY DONE IN COMPUTEad.F90, BUT NOT TESTED !!!!
          !elem%estim_locL = sqrt( elem%estim_locL**2 + elem%estim_loc**2)
          !elem%estim_loc = elem%estim_locL  ! used for adaptivity

          !if(i == 1) write(*,'(a8,20es12.4)') '#@#@@#@3',elem%estim_loc

       enddo ! do i=1,grid%nelem


       !val = 0.; val1 = 0.
       !do i = 1, grid%nelem
       !  elem => grid%elem(i)
       !  val  = val  + elem%estim_loc**2
       !  val1 = val1 + elem%eta(resST,1)**2
       !enddo


       ! will be deallocated at the end of grid  of arrays allocated in SeekElemSupports
       !do i=1,grid%nelem
       !   elem => grid%elem(i)
       !   deallocate(elem%supp)
       !enddo

    endif

    state%time%Ttime = ttime

    !print*,'####  RezidErrorEstimates  END'

    !if( state%time%cn ) then
    !   do i=1,grid%nelem
    !      elem => grid%elem(i)
    !   enddo
    !endif

    deallocate(L_estim)

    !write(*,'(a40, 2l6, f12.4)') &
    !      '#CPU#  RezidErrorEstimates  ends',onlyAS, Ttime_updated, t2 - t0

    !do k=1, 5 ! grid%nelem
    !   write(*,'(a8, 2i5,30es12.4)') 'est_Loc:', grid%elem(k)%i,grid%elem(k)%dof, &
    !        grid%elem(k)%estim_loc, grid%elem(k)%eta(1:4, 1)
    !enddo

  end subroutine RezidErrorEstimates

  !> setting of field elem%vec(rhs,*), elem%vec(rhsT,*) for error estimate
  subroutine SetVectorsFields(onlyAS )
    logical, intent(in) :: onlyAS   ! only space and algebraic estimates
    integer :: ityp, ityp1, itest, i
    class(element), pointer :: elem, elem1
    logical :: deg_plus

    deg_plus = .true.
    !itest = 25

    !ityp = 1  ! backward Euler, STDGM approach
    !ityp = 2  ! Crank-Nicolson, STDGM approach
    !ityp = 3  ! backward Euler, Verfurth approach
    !ityp = 4 ! Crank-Nicolson, Verfurth approach

    ityp1 = 0    ! pw polynomial  approximation in time
    !ityp1 = 1   !STDGM approach
    !ityp1 = 2   !Verfurth approach

    if( state%time%cn ) then
       ityp = 2* ityp1
    else
       ityp = 2* ityp1 - 1
    endif

    !state%num_flux = .false.      ! use physical fluxes instead of numerical ones


    !print*,'SetVectorFields, ityp == ',ityp
    if( state%time%disc_time == 'STDG') then

       !if(state%time%iter <= 1) print*,'####, ATTENTION in estimates.f90, ttime was updated?', state%time%ttime
!         print*, 'SetVectorsFields calling ComputeSTDGM_Terms with implicitly = ', state%nlSolver%implicitly

       !print *,'Already DONE'
       !call ComputeST_Terms( .true. )

       !call ComputeSTDGM_Terms( deg_plus )  -- OLD varinat

       ! do i=1, 5 !grid%nelem
       !    elem => grid%elem(i)
       !    write(*,'(a10, 2i5, 300es12.4)') 'b_sol:',elem%i, elem%dof,  elem%wST(:,1:elem%dof,:)
       ! enddo


    elseif(ityp == -1) then  ! backward Euler, piecewise linear time reconstruction
       ! PREDELAT !!!! VD

       !itest = 25
       !write(*,'(a6,i5,16es12.4)') 'w0', itest, 0., grid%elem(itest)%w(0, 1:2), &
       !     grid%elem(itest)%w(1, 1:2),grid%elem(itest)%w(2, 1:2), grid%elem(itest)%w(3, 1:2)
       !print*,'-----------, vector'

       call PWpolynomialReconstComputeTerms(onlyAS ) !flux vector:  %vec(rhs, * ) = \int_{I_m} c_h(w_h, * )

    elseif(ityp == 1) then  ! backward Euler, STDGM approach
       call ComputeTerms(deg_plus )     ! flux vector:  %vec(rhs, * ) = c_h(w_h, * )

       call TimeDerivativeVector( ) ! BDF term: %vec(rhs,*) += \sum_n \alpha_n (w^{k-n},*)/ tau

       call SolutionDifference( ) ! for estimT:  %vec(rhsT, * ) = (w^{k} - w^{k-1}, *)

    elseif( ityp==0 .or. ityp == 2 ) then ! Crank-Nicolson

       call CrankNicolsonVectorFlux( ) ! %vec(rhs, * ) = ( c_h(w_h^k, * ) + c_h(w_h^{k-1}, * ))/2

       call TimeDerivativeVector( ) ! BDF term: %vec(rhs,*) += \sum_n \alpha_n (w^{k-n},*)/ tau

       call SolutionDifference( ) ! for estimT:  %vec(rhsT, * ) = (w^{k} - w^{k-1}, *)


    elseif(ityp == 3) then ! backward Euler, Verfurth approach
       !elem => grid%elem(itest)
       !!call PWpolynomialReconstComputeTerms( ) !flux vector:  %vec(rhs, * ) = \int_{I_m} c_h(w_h, * )

       !write(197,150) 'PW rhsT:',itest, elem%vec(rhsT, :)
       !write(197,150) 'PW rhs :',itest, elem%vec(rhs , :)
       !!call DualElemEstimate(elem, 3)
       !write(197,'(a6,20es12.4)') 'etas:',elem%eta(resA, 1), elem%eta(resS, 1), elem%eta(resT, 1), elem%eta(resST, 1)
       !write(197,*) '--------------------'


       ! vec(rhsT, * ) = 1/\tau_k \int_k < F(w_h) - F(\tilde{w}_h), \phi_i > dt
       ! vec(rhs, * ) = 1//\tau_m (w_h^k - w_h^{k-1}, \phi) + < F(w_h) , \phi_i >

       !approach based on Verfurth, MUST BE CALLED BEFORE SPACE ESTIMATE !!!
       call FluxVectorDifference( ) ! for estimT:  %vec(rhsT, * ) = c_h(w_h, * ) - c_h(W_h, * )

       ! vec(rhs, * ) already evaluated in FluxVectorDifference( )
       !!!call ComputeTerms( )     ! flux vector:  %vec(rhs, * ) = c_h(w_h, * )

       call TimeDerivativeVector( ) ! BDF term: %vec(rhs,*) += \sum_n \alpha_n (w^{k-n},*)/ tau

       !write(197,150) 'Ver rhsT:',itest, elem%vec(rhsT, :)
       !write(197,150) 'Ver rhs :',itest, elem%vec(rhs , :)
       !call DualElemEstimate(elem, 3)
       !write(197,'(a6,20es12.4)') 'etas:',elem%eta(resA, 1), elem%eta(resS, 1), elem%eta(resT, 1), elem%eta(resST, 1)
       !write(197,*) '--------------------'

150 format(a10,i5, 4(3es12.4 ' |',3es12.4,' ## ') )

    elseif(ityp == 4) then ! Crank-Nicolson, Verfurth approach
       ! vec(rhsT, * ) = 1/\tau_k \int_k < F(w_h) - F(\tilde{w}_h), \phi_i > dt
       ! vec(rhs, * ) = 1//\tau_m (w_h^k - w_h^{k-1}, \phi) + < F(w_h) , \phi_i >

       !approach based on Verfurth, MUST BE CALLED BEFORE SPACE ESTIMATE !!!
       call FluxVectorDifference( ) ! for estimT:  %vec(rhsT, * ) = c_h(w_h, * ) - c_h(W_h, * )

       call CrankNicolsonVectorFlux( ) ! %vec(rhs, * ) = ( c_h(w_h^k, * ) + c_h(w_h^{k-1}, * ))/2

       call TimeDerivativeVector( ) ! BDF term: %vec(rhs,*) += \sum_n \alpha_n (w^{k-n},*)/ tau

    else
       print*,'Unknown ityp in subroutine SetVectorsFields, estimated.f90'
       stop

    endif

    !do i=1,1 !grid%nelem
    !!do i=18,18
    !   elem => grid%elem(i)
    !   !write(*,'(a6,i5, 80es12.4)') 'rhsT:',i, elem%vec(rhsT, :)
    !   write(*,'(a6,i5, 80es12.4)') 'rhs :',i, elem%vec(rhs , :)
    !enddo



  end subroutine SetVectorsFields

  !> reconstruct piecewise linear in time solution
  !> \f$\tilde{w}(t) = w^{k-1} + \frac{t - t_{k-1}}{\tau_k} (w^k - w^{k-1})\f$
  !> \f$ \%vec(rhs, \varphi ) = \int_{I_k} c_h(\tilde{w}(t), \varphi ) d t\f$,
  !> \f$ \varphi \in S_{hp}^{+}\f$
  subroutine PWpolynomialReconstComputeTerms(onlyAS )
    logical, intent(in) :: onlyAS   ! only space and algebraic estimates
    class(element), pointer :: elem
    type(Gauss_rule), pointer :: G_rule
    class(Time_rule), pointer :: T_rule
    real, dimension(:,:), allocatable :: Lag_coef
    integer :: time_deg, Tdeg
    integer :: Gnum, Gdof , j, i, l, l1, ndof, ndofP, itest
    real :: t, ctime_store
    logical ::  deg_plus

    deg_plus = .true.
!!! TWO CHANGES, 1) time_deg = Tdeg , 2)  Gnum = time_deg  !!!!!!!!!

    if(.not. onlyAS) then
       ctime_store = state%time%ctime

       Tdeg = state%time%deg_actual  ! degree of actual BDF rule
       allocate(Lag_coef(0:1, 0:Tdeg) )  ! index1 = 0 -> Lagr functiom, =1 -> its derivative


       !time_deg = 3   ! maximal degree of test functions in time
       time_deg = Tdeg   ! maximal degree of test functions in time  ????


       !itest = 360
       itest = 25

       ! rule of integration in time
       !!Gnum = time_deg  + 1  ! ???
       Gnum = time_deg   ! ???

       !TODO why are we using G_rule - may not work if T_rule is Radau type
       G_rule => state%space%G_rule(Gnum)
       T_rule => state%time%T_rule(Gnum)



       Gdof = G_rule%Qdof

       ! preparing of arrays
       do i=1,grid%nelem
          elem => grid%elem(i)
          ndof = elem%dof * ndim
          ndofP = elem%dof_plus * ndim
          ! indexes of elem%wS: i= -Tdeg:0 ... storing of w^k, w^{k-1}, ..., w^{k-Tdeg},
          ! i=1..time_deg  computing of c(w_h, \vp_*) \phi_i
          allocate(elem%wS( -Tdeg:time_deg, 1:ndofP ) )

          ! storring of the actual solution
          do j=0, Tdeg
             elem%wS(-j, 1:ndof) = elem%w(j, 1:ndof)
          end do

          elem%wS( 1:time_deg, 1:ndofP) = 0. ! arrays for computing of c(w_h, \vp_*) \phi_i
          elem%vec(rhs, 1:ndofP) = 0.
       enddo

       ! integration over the time interval
       do j=1, Gdof
          ! actual time
          t = G_rule%lambda(j)
          state%time%ctime = state%time%ttime + state%time%tau(1) * t   ! %ttime is not yet updated

          call SetLagrCoeffs(Tdeg,  t, Lag_coef(0:1, 0:Tdeg) )

          ! pw polynomial reconstruction of w and dw/dt at t
          do i=1,grid%nelem
             elem => grid%elem(i)
             ndof = elem%dof * ndim

             ! evaluation of w and dw/dt at t using the Lagrangian interpolation
             elem%w(0,1:ndof) = elem%wS(-Tdeg , 1:ndof) * Lag_coef(0, 0)  ! w
             elem%w(1,1:ndof) = elem%wS(-Tdeg , 1:ndof) * Lag_coef(1, 0)  ! dw/dt
             do l=1, Tdeg
                elem%w(0,1:ndof) = elem%w(0,1:ndof) + elem%wS(-Tdeg + l, 1:ndof) * Lag_coef(0, l)
                elem%w(1,1:ndof) = elem%w(1,1:ndof) + elem%wS(-Tdeg + l, 1:ndof) * Lag_coef(1, l)
             enddo

          enddo

          !elem => grid%elem(itest)
          !write(*,*) state%time%ttime+state%time%tau(1), elem%vec(rhs,1:6)

          !print*,'########## bef ComuteTerms', state%time%iter
          !print*,'CHECK ComputeTerms(deg_plus )'
          call ComputeTerms(deg_plus )      ! flux vector:  %vec(rhs, * ) = c_h(w_h, * )
          !print*,'########## aft ComuteTerms', state%time%iter

          !elem%vec(rhsT,:) = elem%vec(rhs,1:6)

          !! REMOVE
          !do i=1,grid%nelem
          !   elem => grid%elem(i)
          !   elem%vec(rhs,:) = 0.
          !enddo

          call AddTimeDerivativeVector( ) ! %vec(rhs, * ) = %vec(rhs, * ) + (D_t w, *)

          !elem => grid%elem(itest)
          !write(283,*) state%time%ttime+state%time%tau(1), elem%vec(rhs,1:6)
          !write(284,*) state%time%ttime+state%time%tau(1), -(elem%vec(rhs,1:6)-elem%vec(rhsT,1:6))


          ! summning over time integ. nodes for each element
          do i=1,grid%nelem
             elem => grid%elem(i)
             ndofP = elem%dof_plus * ndim


             !if(i == itest) then
             !   write(791,'(a6,30es14.6)') 'new', elem%vec(rhs, 1:ndofP)
             !   write(791,'(a6,30es14.6)') 'DwO', (elem%wS(0, 1:) - elem%wS(-1, 1:) )/state%time%tau(1)
             !   write(791,'(a6,30es14.6)') 'DwN', elem%w(1, 1:)
             !   write(791,*) '--------------------------------'
             !endif

             !! adding of the BDF term: %vec(rhs,*) += \sum_n \alpha_n (w^{k-n},*)/ tau
             !elem%vec(rhs, 1:ndofP) = elem%vec(rhs, 1:ndofP) + elem%vec(rhsT, 1:ndofP)

             do l = 1, time_deg   ! c_h(w_h, \varphi_{1:ndofP}) \phi_l  in integ node "j"
                elem%ws(l, 1:ndofP) =  elem%ws(l, 1:ndofP) &
                     + G_rule%weights(j) * elem%vec(rhs, 1:ndofP) * T_rule%phi(l, j)
             enddo

             !if(i == itest) then
             !   l = 6
             !   write(186,'(a8,3i5, 20es14.6)') 'wk..',state%time%iter, 0,0, elem%ws(0,1:6)
             !   write(186,'(a8,3i5, 20es14.6)') 'wk-1',state%time%iter, 0,0, elem%ws(-1,1:6)
             !   write(186,'(a8,3i5, 20es14.6)') 'wi  :', state%time%iter, itest, 0, elem%w(0,1:l)
             !   write(186,'(a8,3i5, 20es14.6)') 'wi_t:', state%time%iter, 0,  0, elem%w(1,1:l)
             !   do l1=1,time_deg
             !      write(186,'(a8,3i5, 20es14.6)') 'ws i:', state%time%iter, j, l1, elem%ws(l1, 1:l)
             !   end do
             !   write(186,*) '--------------------'
             !   write(187,*) state%time%iter + G_rule%lambda(j), elem%ws(1, 1:6)
             !   write(188,*) state%time%ttime+state%time%tau(1), elem%ws(0,1:6)
             !   write(189,*) state%time%ttime, elem%ws(-1,1:6)
             !   write(190,*) state%time%ttime+state%time%tau(1)* G_rule%lambda(j), elem%w(0,1:6)

             !endif

          enddo ! i=1,grid%nelem
       enddo ! j=1,Gdof
       !do not mupltiply with the size of the time step, done in the totat summing in compute.f90


       !write(187,*) '#####    '

       !elem => grid%elem(itest)
       !write(381,*) state%time%ttime+state%time%tau(1), elem%ws(1,1:6)
       !write(382,*) state%time%ttime+state%time%tau(1), elem%ws(2,1:6)
       !write(383,*) state%time%ttime+state%time%tau(1), elem%ws(3,1:6)


       ! refreshing of the actual solution
       ! time derivative in elem%vec(rhsT,:) need not be stored any more
       do i=1,grid%nelem
          elem => grid%elem(i)
          ndof   = elem%dof * ndim
          ndofP = elem%dof_plus * ndim

          elem%w(0, 1:ndof) = elem%wS( 0, 1:ndof)  ! restoring of the solution from last 2 levels
          elem%w(1, 1:ndof) = elem%wS(-1, 1:ndof)

          ! summing of squares of T_rule%phi(l,:), phi_l(t) are L^2 orthogonal
          elem%vec(rhsT, 1:ndofP) =  0.
          do l=1, time_deg
             elem%vec(rhsT, 1:ndofP) = elem%vec(rhsT, 1:ndofP) + elem%wS(l, 1:ndofP)**2

             !if(i == itest) write(19,'(a3,i5,30es14.6)') '!!!', l, &
             !     VectorNorm( elem%vec(rhsT, 1:ndofP) ), &
             !     VectorNorm( elem%wS(l, 1:ndofP) ), elem%wS(l, 1:ndofP)

          enddo

          elem%vec(rhsT, 1:ndofP) = elem%vec(rhsT, 1:ndofP)**0.5

          !!if(i == itest) write(*,'(a3,30es14.6)') '!!!',elem%vec(rhsT, 1:6) - elem%vec(rhs, 1:6)
          !if(i == itest) write(19,*)'-------------------------------'

          deallocate(elem%wS)
       enddo


       deallocate(Lag_coef)
       state%time%ctime = ctime_store
    endif  ! if (.not. onlyAS)

    ! space  estimate
    call ComputeTerms(deg_plus )     ! flux vector:  %vec(rhs, * ) = c_h(w_h, * )

    call TimeDerivativeVector( ) ! BDF term: %vec(rhs,*) += \sum_n \alpha_n (w^{k-n},*)/ tau

    itest = -981
    if(itest > 0 .and. itest <= grid%nelem) then
       elem => grid%elem(itest)

       !write(*,'(a4,i5,8es12.4)') &
       !     'PWS',elem%i, elem%w(0:3, 1), elem%w(0:3, 2)

       write(*,'(a4,i5, 60es12.4)') &
            'PWS',elem%i, elem%vec(rhs,:)

       write(*,'(a4,i5, 60es12.4)') &
            'PWT',elem%i, elem%vec(rhsT,:)
    endif




    !elem => grid%elem(itest)
    !write(63,'(a5,200es12.4)') 'rhs',elem%vec(rhs, :)
    !write(63,'(a5,200es12.4)') 'rhsT',elem%vec(rhsT, :)
    !write(63,*) '/////////////////'
    !stop
  end subroutine PWpolynomialReconstComputeTerms

  !> evaluate   elem%vec(rhs, i ) =
  !> \f$ \frac12 \left(c_h(w_h^k, \varphi_i ) + c_h(w_h^k, \varphi_i ) \right)\f$
  !> for \f$\varphi_i \in S_{hp}\f$
  subroutine CrankNicolsonVectorFlux( )
    class(element), pointer :: elem
    integer ::  i, j, k, ndof, ndofP
    logical :: deg_plus

    deg_plus = .true.

    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof  = elem%dof * ndim
       ndofP = elem%dof_plus * ndim

       allocate(elem%wS(1:1,1:elem%dof * ndim) )

       elem%wS(1,1:elem%dof * ndim)  = elem%w(0,1:ndof)

       elem%w(0,1:elem%dof * ndim) = elem%w(1, 1:ndof)
    enddo

    call ComputeTerms(deg_plus )     ! flux vector:  %vec(rhs, * ) = c_h(w_h^{k-1}, * )
    do i=1,grid%nelem
       elem => grid%elem(i)
       elem%vec(rhsOLD,1:ndofP)  = elem%vec(rhs,1:ndofP)

       elem%w(0,1:ndof) = elem%wS(1,1:ndof)
       deallocate(elem%wS )
    enddo

    call ComputeTerms(deg_plus )     ! flux vector:  %vec(rhs, * ) = c_h(w_h^{k }, * )

    do i=1,grid%nelem        ! Crank-Nicolson (F(w^k) + F(w^{k-1} ) /2
       elem => grid%elem(i)
       elem%vec(rhs,1:ndofP)  = (elem%vec(rhs,1:ndofP) + elem%vec(rhsOLD,1:ndofP) )/2.
    enddo
  end subroutine CrankNicolsonVectorFlux



  !> compute the vector: elem\%vec(i) =
  !> \f$ \frac{1}{\tau_m} \int_{I_m}
  !> ( F({\bf w}_h) - F(\tilde{{\bf w}}_h), \varphi_i)_{L^2(K)} \, {\rm d}t\f$,
  !> \f$i=1,\dots, DOF^+_K,\  K \f$ = elem \f$ \in T_h\f$,
  !> \f$ {\bf w} \f$ is piecewise constant in time
  !> \f$ \tilde{{\bf w}} \f$ is piecewise affine in time
  subroutine FluxVectorDifference( )
    class(element), pointer :: elem      ! elem = element
    type(Gauss_rule), pointer :: G_rule
    real, dimension(:,:), allocatable :: wk_wk1
    integer :: i, j, dof, ndof, ndofP, Qdof, Gnum, Gdof
    real :: t
    integer :: itest
    logical :: deg_plus

    deg_plus = .true.

    itest = -360


    ! saving of elem%w(0,*)
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof  = elem%dof * ndim

       allocate(elem%wS(1:1,1:elem%dof * ndim) )
       elem%wS(1, 1:ndof )  = elem%w(0,1:ndof)

       ndofP = elem%dof_plus * ndim
       elem%vec(rhsT, 1:ndofP) = 0.
    enddo

    ! integration in time
    Gnum = 2   ! 15 is the maximal one !!!!!!!!!
    G_rule => state%space%G_rule(Gnum)
    Gdof = G_rule%Qdof

    do j=1, Gdof
       ! actual time
       t = G_rule%lambda(j)
       !!!!t = 1.

       state%time%ctime = state%time%ttime + state%time%tau(1) * t   ! %ttime is not yet updated

       do i=1,grid%nelem
          elem => grid%elem(i)
          ndof  = elem%dof * ndim
          ! piecewise affine reconstructin in time
          elem%w(0,1:ndof) = (1. - t) * elem%w(1,1:ndof) + t * elem%wS(1, 1:ndof)

       enddo

       call ComputeTerms( deg_plus)     ! flux vector:  %vec(rhs, * ) = c_h(w_h^{k-1}, * )

       do i=1,grid%nelem
          elem => grid%elem(i)
          ndofP = elem%dof_plus * ndim

           if(i == itest)  write(*,'(i5,200es14.6)') j-1,elem%vec(rhsT, 13:15),elem%vec(rhs, 13:15)!,elem%w(0,1:ndof)

          elem%vec(rhsT, 1:ndofP) = elem%vec(rhsT, 1:ndofP) &   ! weighted average
               + G_rule%weights(j) * elem%vec(rhs, 1:ndofP)
           if(i == itest)  write(*,'(i5,200es14.6)') j,elem%vec(rhsT, 13:15),elem%vec(rhs, 13:15)!,elem%w(0,1:ndof)
       enddo

    enddo

    ! backward setting of the actual solution to elem%w
    do i=1,grid%nelem
       elem => grid%elem(i)
       ndof  = elem%dof * ndim
       elem%w(0,1:ndof) = elem%wS(1,1:ndof)
       deallocate(elem%wS )

    enddo


    ! for t= t^k
    call ComputeTerms(deg_plus )     ! flux vector:  %vec(rhs, * ) = c_h(w_h^{k-1}, * )

    do i=1,grid%nelem
       elem => grid%elem(i)
       ndofP = elem%dof_plus * ndim

       elem%vec(rhsT, 1:ndofP) = elem%vec(rhsT, 1:ndofP)  - elem%vec(rhs, 1:ndofP)
       if(i == itest)  write(*,'(i5,200es14.6)') 88,elem%vec(rhs, 13:15)
       if(i == itest)  write(*,'(i5,200es14.6)') 99,elem%vec(rhsT, 13:15)

       !write(*,'(a6,i5, 20es12.4)') 'rhsT:',i, elem%vec(rhsT, 1:ndofP)
       !write(*,'(a6,i5, 20es12.4)') 'rhsT:',i, elem%vec(rhs , 1:ndofP)
       !print*
    enddo
    !!print*,'^^^^^^^^^^^^^^^^^^^^ Ver'

  end subroutine FluxVectorDifference

 !> compute the vector: elem\%vec(i) =
 !> \f$ ({\bf w}_h^k - {\bf w}_h^{k-1}, \varphi_i)_{L^2(K)} \f$
 !> \f$i=1,\dots, DOF^+_K,\  K \f$ = elem \f$ \in T_h\f$
  subroutine SolutionDifference( )
    class(element), pointer :: elem      ! elem = element
    real, dimension(:,:), allocatable :: wk_wk1
    integer :: i, dof, dofA, Qdof

    do i=1,grid%nelem
       elem => grid%elem(i)

       dofA = dof
       if(elem%deg_plus) dofA = elem%dof_plus
       Qdof = elem%Qdof

       elem%vec(rhsT,:) = 0.
       allocate( wk_wk1(1:Qdof, 1:ndim) )

       call Eval_w_w_Elem(elem, wk_wk1)
       call EvalVectorB(elem, wk_wk1, dofA, elem%vec(rhsT,1:dofA*ndim) )
       ! NOT : "normalization:, in order to be in agreement with elem%vec(rhs, 1:dofA*ndim)
       !!elem%vec(rhsT,1:dofA*ndim) = elem%vec(rhsT,1:dofA*ndim) /state%time%tau(1)

       !if( i <= 2) write(*,'(a6,i5, 25es12.4)') 'w-w:',i, &
       !     wk_wk1(:,1), 999999., elem%vec(rhsT,1:dofA*ndim)

       deallocate(wk_wk1)
    enddo
    !print*,'_________________________________________________________'

  end subroutine SolutionDifference

!  !> NOT USED
!  !> reconstruct the solution wST/zST_plus from the space P^{p+1}
!  !> using the LeastSquareInterpolationWeighted, it uses the array elem%wS
!  !> primal == .true. -> wST , .false. -> zST
!  !> R_type = -1 -> H1 interpolation, 0 -> L2 interpolation, 2 -> Ritz reconstruct
!  subroutine reconstructSolution( grid, primal, R_type )
!    class( mesh ), intent(inout) :: grid
!    logical, intent(in) :: primal
!    integer, intent(in) :: R_type
!    integer :: nelem, degP, dofP, Qnum, i
!    class( element ), pointer :: elem
!    real, allocatable, dimension(:,:,:,:) :: Dw ! for the least square reconstruction
!    logical :: flag ! problem flag
!
!    flag = .true.
!
!    if(.not. grid%ElemSupports) & ! create the list of elements sharing at least a vertex with elem
!         call SeekElemSupports(grid)
!
!    nelem = grid%nelem
!
!    if (R_type == 2) then
!      !need Aplus, allocated zST(1:dof_plus) , res(vp+) forall vp+ \in Shpp
!      !update matrix and rhsvector
!      ! primal - rhs = res(u_h)(vp+) vp+ \in Shpp
!      ! dual   - rhs = res*(z_h)(vp+) vp+ \in Shpp
!      ! saved to wSTplus or zSTplus the reconstruction for all elements
!      call RitzReconstruction( grid, primal, flag, DWR )
!
!
!    else
!
!       if (state%time%deg > 0) &
!         stop 'reconstructSolution works only for stationary problems - q = 0'
!
!       if (primal) then
!         do i = 1, nelem
!            elem => grid%elem(i)
!            ! rewrites - need no allocation check
!            elem%wS = Transfer_funST_to_funS( elem%wST( 1:ndim, 1:elem%dof, 1:elem%Tdof ), &
!                      elem%dof, elem%Tdof, 0, elem%TQnum )
!          enddo !i
!       else
!         do i = 1, nelem
!            elem => grid%elem(i)
!
!            elem%wS = Transfer_funST_to_funS( elem%zST( 1:ndim, 1:elem%dof, 1:elem%Tdof ), &
!                      elem%dof, elem%Tdof, 0, elem%TQnum )
!          enddo !i
!       endif
!
!       !primal solution
!       if (primal) then
!       ! reconstruct the solution I_h(w)
!          do i = 1, nelem
!            elem => grid%elem(i)
!            degP = elem%deg + state%p_mod_max ! ?
!            dofP = DOFtriang(degP) ! ?
!            Qnum = elem%Qnum
!
!            ! only 1:ndim,0,0,1:dofP is used - the others are for the derivatives
!            allocate( Dw(1:ndim, 0:degP, 0:degP, 1:dofP ) , source = 0.0 )
!            call LeastSquareInterpolationWeighted(elem, ndim, .false., Qnum, degP, dofP, Dw, R_type  )
!
!            if ( associated( elem%wSTplus ) ) then
!               deallocate( elem%wSTplus )
!            endif
!            allocate( elem%wSTplus( 1:ndim, 1:dofP, 1:elem%Tdof ) )
!            elem%wSTplus( 1:ndim, 1:dofP, 1 ) = Dw(1:ndim, 0, 0, 1:dofP)
!
!            !call PlotElemFunction3D(121, elem, dofP, elem%wSTplus( 1, 1:dofP, 1 ) )
!
!
!            !CONTROL - not zero somewhere
!            if ( norm2(Dw(1, 0, 0, elem%dof + 1 : dofP) ) > 1E-9 ) then
!               flag = .false.
!            endif
!
!            deallocate( Dw )
!          enddo !i
!
!       !dual
!       else
!          ! reconstruct the solutions I_h(z)
!          do i = 1, nelem
!            elem => grid%elem(i)
!            degP = elem%deg + state%p_mod_max  ! ?
!            dofP = DOFtriang(degP) ! ?
!            Qnum = elem%Qnum
!
!            ! only 1:ndim,0,0,1:dofP is used - the others are for the derivatives
!            allocate( Dw(1:ndim, 0:degP, 0:degP, 1:dofP ) , source = 0.0 )
!            call LeastSquareInterpolationWeighted(elem, ndim, .false., Qnum, degP, dofP, Dw, R_type  )
!
!            if ( associated( elem%zSTplus ) ) then
!               deallocate( elem%zSTplus )
!            endif
!            allocate( elem%zSTplus( 1:ndim, 1:dofP, 1:elem%Tdof ) )
!            elem%zSTplus( 1:ndim, 1:dofP, 1 ) = Dw(1:ndim, 0, 0, 1:dofP)
!
!            !call PlotElemFunction3D(131, elem, dofP, elem%zSTplus( 1, 1:dofP, 1 ) )
!
!            !CONTROL - not zero somewhere
!            if ( norm2(Dw(1, 0, 0, elem%dof + 1 : dofP) ) > 1E-9 ) then
!               flag = .false.
!            endif
!
!   !         if (i==1) then
!   !
!   !            print*, 'zST: ', elem%zST, 'size: ', size(elem%zST)
!   !            print*, 'zST_plus:' , elem%zSTplus , 'size: ',size(elem%zSTplus)
!   !
!   !         end if
!
!            deallocate( Dw )
!          enddo !i
!
!       endif   ! primal / dual
!
!       !deallocate WS
!       do i = 1, nelem
!         elem => grid%elem(i)
!         deallocate( elem%wS )
!       enddo !i
!
!    endif ! R_type
!
!    if ( flag ) then
!      print*, 'primal:', primal, 'R_type(L2/H1/H-1:', R_type
!      print*,  'Problem in reconstructSolution - the reconstructed solution is almost (1E-9) zero at ALL triangles.'
!    endif
!
!  end subroutine reconstructSolution

  !> reconstruct the solution wST/zST_plus from the space P^{p+1}
  !> using the LeastSquareInterpolationWeighted, it uses the array elem%wS
  !> R_type = -1 -> H1 interpolation, 0 -> L2 interpolation, 2 -> Ritz reconstruct
  subroutine ReconstrPrimalDualSolutions( grid, R_type )
    class( mesh ), intent(inout) :: grid
    integer, intent(in) :: R_type
    integer :: nelem, degP, dofP, i, j, degPP, dofPP
    class( element ), pointer :: elem
    real, allocatable, dimension(:,:,:) :: Dw ! for the least square reconstruction
    integer :: dof, Tdof, TQnum, ifile
    !logical :: flag ! problem flag
    logical :: loc_implicitly

    print*,' subroutine ReconstrPrimalDualSolutions (2) R_ type =', R_type  !  -10

    call state%cpuTime%startHOrecTime()
    
    ifile = 100 + 10 * state%space%adapt%adapt_level

    if (state%time%deg > 0) &
            stop 'reconstructSolution works only for stationary problems - q = 0'

    if(.not. grid%ElemSupports) & ! create the list of elements sharing at least a vertex with elem
         call SeekElemSupports(grid)

    nelem = grid%nelem

    ! allocate arrays
    do i = 1, nelem
      elem => grid%elem(i)
      ! arrays for reconstructed solutions
      degP = elem%deg + state%p_mod_max  ! ?
      degPP = elem%deg + 2

      dofP  = DOFtriang(degP)  !
      dofPP = DOFtriang(degPP) ! we need reconstruction of degree p_K + 2 for wST_LS

      ! FR_DEGPLUS
      !if ( associated( elem%wSTplus ) ) deallocate( elem%wSTplus )
      !if ( associated( elem%zSTplus ) ) deallocate( elem%zSTplus )
      ! HERE new array size ( 1:ndim, 1:dofP, 0:2)
      !allocate( elem%wSTplus( 1:ndim, 1:dofP, -1:2), source = 0.0 )   !!1:elem%Tdof ) )
      ! FR_DEGPLUS
      !allocate( elem%zSTplus( 1:ndim, 1:dofP, -1:2), source = 0.0 )   !!1:elem%Tdof ) )

      if( .not. state%state_of_terms%is_DWR_etaI_actual( ) ) then  ! reconstructions are actual
         if ( associated( elem%wST_Ritz ) ) deallocate( elem%wST_Ritz )
         if ( associated( elem%zST_Ritz ) ) deallocate( elem%zST_Ritz )
         allocate( elem%wST_Ritz( 1:ndim, 1:dofP, 1:1), source = 0.0 )   !!1:elem%Tdof ) )
         allocate( elem%zST_Ritz( 1:ndim, 1:dofP, 1:1), source = 0.0 )   !!1:elem%Tdof ) )
      endif

      if ( associated( elem%wST_LS ) ) deallocate( elem%wST_LS )
      if ( associated( elem%zST_LS ) ) deallocate( elem%zST_LS )
      ! HERE new array size ( 1:ndim, 1:dofP, 0:2)
      allocate( elem%wST_LS( 1:ndim, 1:dofPP, 0:2), source = 0.0 )   !!1:elem%Tdof ) )
      allocate( elem%zST_LS( 1:ndim, 1:dofPP, 0:2), source = 0.0 )   !!1:elem%Tdof ) )

    enddo !i

    if (R_type == -1 .or. R_type == 0 .or. R_type == -10) then ! LS reconstruction

        ! copying of the primal and dual solutions into auxiliary array elem%ws(1:2*ndim, :)
        do i = 1, nelem
          elem => grid%elem(i)
          dof = elem%dof
          Tdof = elem%Tdof
          TQnum = elem%TQnum

          if( allocated(elem%wS)) deallocate(elem%wS)
          allocate( elem%wS(1: 2*ndim, 1:dof) )

          !primal solution
          elem%wS( 1 : ndim, 1:dof)  = Transfer_funST_to_funS(  &
               elem%wST( 1:ndim, 1:dof, 1:Tdof ), dof, Tdof, 0, TQnum )

          !dual solution
          elem%wS(ndim+1 : 2*ndim, 1:dof)  = Transfer_funST_to_funS(  &
               elem%zST( 1:ndim, 1:dof, 1:Tdof ), dof, Tdof, 0, TQnum )
        end do !i

        ! reconstruct the primal solution I_h(w) and the dual solution I_h(z)
        do i = 1, nelem
          elem => grid%elem(i)
          dof = elem%dof

          degPP = elem%deg + 2   ! state%p_mod_max
          ! FR_dofP
          dofPP = DOFtriang(degPP) ! we need reconstruction of degree p_K + 2

          ! only 1:ndim,0,0,1:dofP is used - the others are for the derivatives
          allocate( Dw(1:2*ndim, 0:2, 1:dofPP ) , source = 0.0 )

          ! higher order reconstruction on elem using its neighbours from grid
          call WENO_LS_Interpolation(grid, elem, 2*ndim, dofPP, Dw(1:2*ndim, 0:2, 1:dofPP), R_type  )

          do j=0,2
             elem%wST_LS( 1:ndim, 1:dofPP, j ) = Dw(    1  :   ndim, j, 1:dofPP)
             elem%zST_LS( 1:ndim, 1:dofPP, j ) = Dw(ndim+1 : 2*ndim, j, 1:dofPP)

          enddo
          !elem%wST_LS( 1:ndim, :, 0 ) = 0.
          !elem%zST_LS( 1:ndim, :, 0 ) = 0.
          
          !elem%wST_LS( 1:ndim, 1:dof, 0 ) =  elem%wS( 1 : ndim, 1:dof)
          !elem%zST_LS( 1:ndim, 1:dof, 0 ) =  elem%wS(ndim+1 : 2*ndim, 1:dof)
          
          !CONTROL - not zero somewhere
          !if ( norm2(Dw(1, 1, elem%dof + 1 : dofPP) ) > 1E-9 ) flag = .false.
          !if ( norm2(Dw(2, 1, elem%dof + 1 : dofPP) ) > 1E-9 ) flag = .false.

!          ! UNCOMMENT THE FOLLOWING FOR VERIFICATION
!          if(i .ne. 76 .and. i .ne. 82) then
!             elem%wST_LS = 0.
!             elem%zST_LS = 0.
!          else
!             print*,'t37yu3:', elem%i, elem%xc(:)
!          endif
          !dofP = DOFtriang( elem%deg)
          !call PlotElemFunction3D(ifile+1, elem, dofP, elem%wST_LS( 1:ndim, 1:dofP, 0) )
          !call PlotElemFunction3D(ifile+2, elem, dofP, elem%zST_LS( 1:ndim, 1:dofP, 0) )

          !dofP = DOFtriang( elem%deg+1)
          !call PlotElemFunction3D(ifile+3, elem, dofP, elem%wST_LS( 1:ndim, 1:dofP, 1) )
          !call PlotElemFunction3D(ifile+4, elem, dofP, elem%zST_LS( 1:ndim, 1:dofP, 1) )

          !dofP = DOFtriang( elem%deg+2)
          !call PlotElemFunction3D(ifile+5, elem, dofP, elem%wST_LS( 1:ndim, 1:dofP, 2) )
          !call PlotElemFunction3D(ifile+6, elem, dofP, elem%zST_LS( 1:ndim, 1:dofP, 2) )

          !dofP = DOFtriang( elem%HO_deg)
          !dofP = DOFtriang( elem%deg+1)
          !call PlotElemFunction3D(ifile+3, elem, dofP, elem%wSTplus( 1:ndim, 1:dofP, -1) )
          !call PlotElemFunction3D(ifile+4, elem, dofP, elem%zSTplus( 1:ndim, 1:dofP, -1) )

          deallocate( Dw )
        enddo !i

        do i = 1, nelem
          elem => grid%elem(i)
          deallocate( elem%wS )
        enddo !i

    else if ( R_type == 2) then !do nothing here
    else
       stop 'Unknown type of reconstruction in ReconstrPrimalDualSolutions'
    endif   ! LS reconstruction

    ! compute Ritz too, but save it into wSTplus(:,:,-1)
    if( (R_type == -10 .or. R_type == 2) .and. &
         .not. state%state_of_terms%is_DWR_etaI_actual( ) ) then
       ! state%state_of_terms%is_DWR_etaI_actual( ) == TRUE  not necessary ro updae
       ! reconstruction using the solution of the local problems - Ritz reconstruction
       !      print*, "COMMENTED iterative Ritz in ReconstrPrimalDualSolutions"

       ! the following works only for linear problem
       !call  RitzReconstr_PrimalDual_big( grid )! works only for linear problem with "tau=Inf"
       !print*, "Calling iterative Ritz in ReconstrPrimalDualSolutions"

       ! for nonlinear problems, for linear works too, only one step is performed (ONLY ONE test)
       !if(state%space%adapt%adapt_level <= 5) then
       !   print*, "RitzReconstr_PrimalDual_big called instead of ITERATIVE", &
       !        ',  adapt_level = ',state%space%adapt%adapt_level
       !call  RitzReconstr_PrimalDual_big( grid )
       !else
       call IterativeRitzReconstr_PrimalDual_big( grid )
       !endif

    endif

    ! replacement of Ritz by LS
!    do i = 1, nelem
!       elem => grid%elem(i)
!       dofP = DOFtriang(elem%deg + 1)
!       elem%wST_Ritz( 1:ndim, 1:dofP, 1) = elem%wST_LS( 1:ndim, 1:dofP, 1 )
!       elem%zST_Ritz( 1:ndim, 1:dofP, 1) = elem%zST_LS( 1:ndim, 1:dofP, 1 )
!       if(elem%i == 1) print*,'NO RITZ !!!', R_type
!    enddo


!    else
!      ! copy wSTplus(:,:,1) , zSTplus(:,:,1) -> wSTplus(:,:,-1) , zSTplus(:,:,-1)
!      do i = 1, nelem
!        elem => grid%elem(i)
!
!        degP = elem%deg + state%p_mod_max
!        dofP = DOFtriang(degP)
!
!        elem%wSTplus(:,1:dofP,1) = elem%wST_LS(:,1:dofP,1)
!        elem%zSTplus(:,1:dofP,1) = elem%zST_LS(:,1:dofP,1)
!      end do
!    endif

!    do i = 1, nelem
!          elem => grid%elem(i)
!          print*, 'wwST = ', norm2(elem%wST)
!          print*, 'zzST = ', norm2(elem%zST)
!!          print*, 'wwSTplus = ', norm2(elem%wSTplus)
!!          print*, 'zzSTplus = ', norm2(elem%zSTplus)
!
!      end do

    !write(*,'(a12, 200es12.4)')  "%wsTPlus X", grid%elem(1)%wSTplus( 1, :, -1 )
    !write(*,'(a12, 200es12.4)')  "%wsTPlus X", grid%elem(1)%wST_LS( 1, :, 1 )
    !write(*,'(a12, 200es12.4)')  "%zsTPlus X", grid%elem(1)%zSTplus( 1, :, -1 )
    !write(*,'(a12, 200es12.4)')  "%wsTPlus X", grid%elem(1)%zST_LS( 1, :, 1 )
    !print*,'------------------------------------------------', R_type

      ! vertex based continuous reconstruction: inaccurate + very time consuming !
       !call Vertex_based_PrimalDual(grid)

!       print*, 'Warning: The reconstructions were nullified!'
!       print*, '***'


!    if ( flag ) then
!       print*, 'primal:', primal, 'R_type(L2/H1/H-1):', R_type
!       print*,  'Problem in reconstructSolution - the reconstructed solution is almost (1E-9) zero at ALL triangles.'
!    endif

    !stop "end subroutine ReconstrPrimalDualSolutions"

    call state%cpuTime%addHOrecTime()

  end subroutine ReconstrPrimalDualSolutions

  !> reconstruct the solution wST_plus from the space P^{p+1}
  !> using the LeastSquareInterpolationWeighted, it uses the array elem%wS
  !> R_type = -1 -> H1 interpolation, 0 -> L2 interpolation, 2 -> Ritz reconstruct
  subroutine ReconstrPrimalSTSolutions( grid, R_type )
    class( mesh ), intent(inout) :: grid
    integer, intent(in) :: R_type
    class( element ), pointer :: elem
    class(Time_rule), pointer :: T_rule
    integer :: nelem, degP, dofP, TdofP, i, j, k, l, degPP, dofPP
    real, allocatable, dimension(:,:,:) :: Dw, Dwi, ws ! for the least square reconstruction
    real, allocatable, dimension(:,:) :: wi, Fx ! for the least square reconstruction
    real, allocatable, dimension(:) :: valH1, valL2, sumH1, sumL2 ! storing array
    real, allocatable, dimension(:,:) :: errL8   ! storing array
    real, allocatable, dimension(:) :: errL8_loc ! storing array
    real :: weightT
    integer :: dof, Tdof, TQnum, ifile, Gdof, Qdof, TdofG
    !logical :: flag ! problem flag
    logical :: loc_implicitly

    !print*,' subroutine ReconstrPrimaSTSolutions =', R_type  !  -10

    !call state%cpuTime%startHOrecTime()
    
    ifile = 100 + 10 * state%space%adapt%adapt_level

    if(.not. grid%ElemSupports) & ! create the list of elements sharing at least a vertex with elem
         call SeekElemSupports(grid)

    nelem = grid%nelem

    ! allocate arrays
    do i = 1, nelem
      elem => grid%elem(i)
      ! arrays for reconstructed solutions
      degP = elem%deg + state%p_mod_max  ! ?
      degPP = elem%deg + 2

      dofP  = DOFtriang(degP)  !
      dofPP = DOFtriang(degPP) ! we need reconstruction of degree p_K + 2 for wST_LS

      Tdof = elem%Tdof + state%q_mod_max  ! ?
      
      ! FR_DEGPLUS
      !if ( associated( elem%wSTplus ) ) deallocate( elem%wSTplus )
      !if ( associated( elem%zSTplus ) ) deallocate( elem%zSTplus )
      ! HERE new array size ( 1:ndim, 1:dofP, 0:2)
      !allocate( elem%wSTplus( 1:ndim, 1:dofP, -1:2), source = 0.0 )   !!1:elem%Tdof ) )
      ! FR_DEGPLUS
      !allocate( elem%zSTplus( 1:ndim, 1:dofP, -1:2), source = 0.0 )   !!1:elem%Tdof ) )

      if( .not. state%state_of_terms%is_DWR_etaI_actual( ) ) then  ! reconstructions are actual
         if ( associated( elem%wST_Ritz ) ) deallocate( elem%wST_Ritz )
         !if ( associated( elem%zST_Ritz ) ) deallocate( elem%zST_Ritz )
         allocate( elem%wST_Ritz( 1:ndim, 1:dofP, 1:Tdof), source = 0.0 )   !!1:elem%Tdof ) )
         !allocate( elem%zST_Ritz( 1:ndim, 1:dofP, 1:Tdof), source = 0.0 )   !!1:elem%Tdof ) )
      endif

      !if ( associated( elem%wST_LS ) ) deallocate( elem%wST_LS )
      !if ( associated( elem%zST_LS ) ) deallocate( elem%zST_LS )
      !! HERE new array size ( 1:ndim, 1:dofP, 0:2)
      !allocate( elem%wST_LS( 1:ndim, 1:dofPP, 0:2), source = 0.0 )   !!1:elem%Tdof ) )
      !allocate( elem%zST_LS( 1:ndim, 1:dofPP, 0:2), source = 0.0 )   !!1:elem%Tdof ) )

    enddo !i

    !if (R_type == -1 .or. R_type == 0 .or. R_type == -10) then ! LS reconstruction
    if (R_type == -1 .and. R_type == 0 .and. R_type == -10) then ! LS reconstruction
       stop "! NOT USED AT THIS MOMENT"
        ! copying of the primal and dual solutions into auxiliary array elem%ws(1:2*ndim, :)
        do i = 1, nelem
          elem => grid%elem(i)
          dof = elem%dof
          Tdof = elem%Tdof
          TQnum = elem%TQnum

          if( allocated(elem%wS)) deallocate(elem%wS)
          allocate( elem%wS(1: 2*ndim, 1:dof) )

          !primal solution
          elem%wS( 1 : ndim, 1:dof)  = Transfer_funST_to_funS(  &
               elem%wST( 1:ndim, 1:dof, 1:Tdof ), dof, Tdof, 0, TQnum )

          !dual solution
          elem%wS(ndim+1 : 2*ndim, 1:dof)  = Transfer_funST_to_funS(  &
               elem%zST( 1:ndim, 1:dof, 1:Tdof ), dof, Tdof, 0, TQnum )
        end do !i

        ! reconstruct the primal solution I_h(w) and the dual solution I_h(z)
        do i = 1, nelem
          elem => grid%elem(i)
          dof = elem%dof

          degPP = elem%deg + 2   ! state%p_mod_max
          ! FR_dofP
          dofPP = DOFtriang(degPP) ! we need reconstruction of degree p_K + 2

          ! only 1:ndim,0,0,1:dofP is used - the others are for the derivatives
          allocate( Dw(1:2*ndim, 0:2, 1:dofPP ) , source = 0.0 )

          ! higher order reconstruction on elem using its neighbours from grid
          call WENO_LS_Interpolation(grid, elem, 2*ndim, dofPP, Dw(1:2*ndim, 0:2, 1:dofPP), R_type  )

          do j=0,2
             elem%wST_LS( 1:ndim, 1:dofPP, j ) = Dw(    1  :   ndim, j, 1:dofPP)
             elem%zST_LS( 1:ndim, 1:dofPP, j ) = Dw(ndim+1 : 2*ndim, j, 1:dofPP)

          enddo
          !elem%wST_LS( 1:ndim, :, 0 ) = 0.
          !elem%zST_LS( 1:ndim, :, 0 ) = 0.
          
          !elem%wST_LS( 1:ndim, 1:dof, 0 ) =  elem%wS( 1 : ndim, 1:dof)
          !elem%zST_LS( 1:ndim, 1:dof, 0 ) =  elem%wS(ndim+1 : 2*ndim, 1:dof)
          
          !CONTROL - not zero somewhere
          !if ( norm2(Dw(1, 1, elem%dof + 1 : dofPP) ) > 1E-9 ) flag = .false.
          !if ( norm2(Dw(2, 1, elem%dof + 1 : dofPP) ) > 1E-9 ) flag = .false.

!          ! UNCOMMENT THE FOLLOWING FOR VERIFICATION
!          if(i .ne. 76 .and. i .ne. 82) then
!             elem%wST_LS = 0.
!             elem%zST_LS = 0.
!          else
!             print*,'t37yu3:', elem%i, elem%xc(:)
!          endif
          !dofP = DOFtriang( elem%deg)
          !call PlotElemFunction3D(ifile+1, elem, dofP, elem%wST_LS( 1:ndim, 1:dofP, 0) )
          !call PlotElemFunction3D(ifile+2, elem, dofP, elem%zST_LS( 1:ndim, 1:dofP, 0) )

          !dofP = DOFtriang( elem%deg+1)
          !call PlotElemFunction3D(ifile+3, elem, dofP, elem%wST_LS( 1:ndim, 1:dofP, 1) )
          !call PlotElemFunction3D(ifile+4, elem, dofP, elem%zST_LS( 1:ndim, 1:dofP, 1) )

          !dofP = DOFtriang( elem%deg+2)
          !call PlotElemFunction3D(ifile+5, elem, dofP, elem%wST_LS( 1:ndim, 1:dofP, 2) )
          !call PlotElemFunction3D(ifile+6, elem, dofP, elem%zST_LS( 1:ndim, 1:dofP, 2) )

          !dofP = DOFtriang( elem%HO_deg)
          !dofP = DOFtriang( elem%deg+1)
          !call PlotElemFunction3D(ifile+3, elem, dofP, elem%wSTplus( 1:ndim, 1:dofP, -1) )
          !call PlotElemFunction3D(ifile+4, elem, dofP, elem%zSTplus( 1:ndim, 1:dofP, -1) )

          deallocate( Dw )
        enddo !i

        do i = 1, nelem
          elem => grid%elem(i)
          deallocate( elem%wS )
        enddo !i

    else if ( R_type == 2) then !do nothing here
    else
       stop 'Unknown type of reconstruction in ReconstrPrimalDualSolutions'
    endif   ! LS reconstruction

    ! compute Ritz too, but save it into wSTplus(:,:,-1)
    if( (R_type == -10 .or. R_type == 2) ) then   !.and. &
    !     .not. state%state_of_terms%is_DWR_etaI_actual( ) ) then
       ! state%state_of_terms%is_DWR_etaI_actual( ) == TRUE  not necessary ro updae
       ! reconstruction using the solution of the local problems - Ritz reconstruction
       !      print*, "COMMENTED iterative Ritz in ReconstrPrimalDualSolutions"

       ! the following works only for linear problem
       !call  RitzReconstr_PrimalDual_big( grid )! works only for linear problem with "tau=Inf"
       !print*, "Calling iterative Ritz in ReconstrPrimalDualSolutions"

       ! for nonlinear problems, for linear works too, only one step is performed (ONLY ONE test)
       !if(state%space%adapt%adapt_level <= 5) then
       !   print*, "RitzReconstr_PrimalDual_big called instead of ITERATIVE", &
       !        ',  adapt_level = ',state%space%adapt%adapt_level
       !call  RitzReconstr_PrimalDual_big( grid )
       !else
       call IterativeRitzReconstr_PrimalST_big( grid )
       !endif

    endif

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ! error from the Ritz reconstruction
    allocate(valL2(1:ndim), valH1(1:ndim), sumL2(1:ndim), sumH1(1:ndim) )

    TdofG = grid%elem(1)%TQnum
    allocate( errL8(-1:TdofG, 1:ndim), errL8_loc(1:ndim) )

    T_rule => state%time%T_rule( grid%elem(1)%TQnum)

    errL8 = 0.
    !!state%estim(RinterL2L2, :) = 0.  NOT, RinterL2L2 accumulates from t=0.
    !!state%estim(RinterL2H1, :) = 0.
    sumL2 = 0.
    sumH1 = 0.
    
    do i = 1, grid%nelem
       elem => grid%elem(i)
       dof = elem%dof
       Tdof = elem%Tdof
       Qdof = elem%Qdof
       Gdof = elem%TQnum
       dofP  = DOFtriang( elem%deg + state%p_mod_max)  !
       TdofP = elem%Tdof + state%q_mod_max  !
      
       allocate( wi(1:Qdof, 1:2*ndim), Dwi(1:Qdof, 1:2*ndim, 1:nbDim) )

       allocate(ws(1:ndim,1:dofP,1:TdofP) )
       ws(1:ndim,1:dofP,1:TdofP) =  elem%wST_Ritz(1:ndim,1:dofP,1:TdofP)

       !projection error
       elem%wST_Ritz(1:ndim,1:dof,1:Tdof) = elem%wST_Ritz(1:ndim,1:dof,1:Tdof) &
            - elem%wST(1:ndim,1:dof,1:Tdof)
       
       ! L^2 projection   
       !elem%wST_Ritz(1:ndim,1:dof,1:Tdof) = 0.

       elem%eta(RinterL2L2, 1:ndim) = 0.
       elem%eta(RinterL2H1, 1:ndim) = 0.
       
       if(TdofG /= T_rule%Qdeg) then
          print*,'different elem%Tdof', TdofG, T_rule%Qdeg, elem%TQnum, &
               state%time%Qnum, state%time%deg
          stop
       endif
       
       errL8_loc = 0.
       do k = -1, elem%TQnum
          ! setting of the space-time polynomial of whST and D whST in time integ nodes
          call Eval_whST_Ritz_Elem(elem, Gdof, k, wi(1:Qdof,1:ndim), Dwi(1:Qdof,1:ndim,1:nbDim)  )

          !call Eval_whST_Ritz_Elem(elem, Gdof,-1, wi(1:Qdof,ndim+1:2*ndim), Dwi(1:Qdof,ndim+1:2*ndim,1:nbDim)  )

          !L^2-norm
          call IntegrateSquareVectorFunction2(elem,  wi(1:Qdof,1:ndim), valL2(1:ndim) )
          !elem%eta(RinterLq, 1:ndim) = sqrt(valL2(1:ndim))

          !! H^1-seminorm
          wi(1:Qdof,1:ndim) = Dwi(1:Qdof,1:ndim,1)**2 +  Dwi(1:Qdof,1:ndim,2)**2 
          call IntegrateVectorFunction2(elem,  wi(1:Qdof,1:ndim), valH1(1:ndim) )
          !elem%eta(RinterH1, 1:ndim) = sqrt(valH1(1:ndim))

          !write(*,'(a8, 2i5, 30es12.4)') 'ESRT',elem%i, k, valL2(:), errL8_loc(:), errL8(k, 1:ndim)

          ! L2L2-norm, L2H1-seminorm
          if(k > 0) then
             weightT = T_rule%weights(k) * state%time%tau(1)
             elem%eta(RinterL2L2, 1:ndim) = elem%eta(RinterL2L2, 1:ndim) + weightT * valL2(1:ndim)
             elem%eta(RinterL2H1, 1:ndim) = elem%eta(RinterL2H1, 1:ndim) + weightT * valH1(1:ndim)
             !stop "missing tau 9ri494o"
          endif

          ! L8L2-norm (element, global)
          errL8(k, 1:ndim) = errL8(k, 1:ndim) + valL2(1:ndim)
          valL2(1:ndim) = sqrt(valL2(1:ndim) )
          do l=1, ndim
             errL8_loc(l) = max( errL8_loc(l), valL2(l) )
          end do


       enddo
       elem%eta(RinterL8L2, 1:ndim) = errL8_loc(1:ndim)

       sumL2(:) = sumL2(:) + elem%eta(RinterL2L2, :)
       sumH1(:) = sumH1(:) + elem%eta(RinterL2H1, :)

       elem%eta(RinterL2L2, :) = sqrt(elem%eta(RinterL2L2, :))
       elem%eta(RinterL2H1, :) = sqrt(elem%eta(RinterL2H1, :))

       ! setting back projection
       elem%wST_Ritz(1:ndim,1:dofP,1:TdofP) = ws(1:ndim,1:dofP,1:TdofP)

       !if(valL2(1) == 0.) then
       !   write(*,'(a10, 2i5, 300es12.4)') 'W ritz', ndim, dofP,  elem%wST_Ritz(1,1:dofP,2)
       !   write(*,'(a10, 300es12.4)') 'Wi',wi(1:Qdof,1:ndim)
       !   print*
       !endif
       
       !ifile = 3010+10*state%time%iter + 1
       !allocate(Fx(1:Qdof, 1:2) )
       !call ComputeF(elem, Qdof, state%space%V_rule(elem%Qnum)%lambda(1:Qdof, 1:2), Fx(1:Qdof, 1:2) )
        !do k=1, Qdof
        !   write(ifile, *) Fx(k, 1:2), wi(k, :)
        !enddo
        !deallocate (Fx)

       deallocate(wi, Dwi, ws)
    end do

    ! total L^\infty( I_m, L^2(\Omega)) - norm, stored in square
    do l=1, ndim
       state%estim(RinterL8L2, l) = maxval( errL8(-1:TdofG, l) ) 
    end do

    ! L^2(L^2) and L^2(H^1) - norms
    state%estim(RinterL2L2, :) = state%estim(RinterL2L2, :) + sumL2(:)
    state%estim(RinterL2H1, :) = state%estim(RinterL2H1, :) + sumH1(:)

    state%estim(RinterL2L2_loc, :) = state%estim(RinterL2L2_loc, :) + sumL2(:)
    state%estim(RinterL2H1_loc, :) = state%estim(RinterL2H1_loc, :) + sumH1(:)

    !write(*,'(a10, 300es12.4)') 'EWW L8L2', &
    !     sqrt(state%estim(RinterL8L2, :)) , \sqrt(state%estim(RinterL2L2, :)), sumL2(:)

    
    !write(*,'(a8, a5, 40es12.4)') "EEEWWEE", 'sum',  &
    !     sqrt(state%estim(RinterL8L2, 1)), sqrt( errL8(-1:TdofG, 1) )
    
    deallocate(valL2, valH1, errL8, errL8_loc, sumL2, sumH1)

!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!



    
       ! call Eval_whST_Ritz_Elem(elem, Gdof, 0, wi(1:Qdof,1:ndim), Dwi(1:Qdof,1:ndim,1:nbDim)  )
    
    ! replacement of Ritz by LS
    !do i = 1, nelem
       !elem => grid%elem(i)
       !       dofP = DOFtriang(elem%deg + 1)
!       elem%wST_Ritz( 1:ndim, 1:dofP, 1) = elem%wST_LS( 1:ndim, 1:dofP, 1 )
!       elem%zST_Ritz( 1:ndim, 1:dofP, 1) = elem%zST_LS( 1:ndim, 1:dofP, 1 )
!       if(elem%i == 1) print*,'NO RITZ !!!', R_type
!    enddo


!    else
!      ! copy wSTplus(:,:,1) , zSTplus(:,:,1) -> wSTplus(:,:,-1) , zSTplus(:,:,-1)
!      do i = 1, nelem
!        elem => grid%elem(i)
!
!        degP = elem%deg + state%p_mod_max
!        dofP = DOFtriang(degP)
!
!        elem%wSTplus(:,1:dofP,1) = elem%wST_LS(:,1:dofP,1)
!        elem%zSTplus(:,1:dofP,1) = elem%zST_LS(:,1:dofP,1)
!      end do
!    endif


    !stop "end subroutine ReconstrPrimalDualSolutions"

    !call state%cpuTime%addHOrecTime()

  end subroutine ReconstrPrimalSTSolutions

!  !> reconstruction using the solution of the local problems - Ritz reconstruction
!  subroutine RitzReconstr_PrimalDual( grid, additional )
!    class( mesh ), intent(inout) :: grid
!    logical, intent(in), optional :: additional
!    class( element ), pointer :: elem
!    real, dimension(:), allocatable :: b
!    logical :: loc_implicitly, deg_plus
!    integer :: i, j, dofP, nsize, dof, dofM, Tdof, ifile
!    integer :: tIndex
!
!    if (present(additional)) then
!      tIndex = -1
!    else
!      tIndex = 1
!    endif
!
!    !print*, 'RitzReconstr_PrimalDual called!'
!    ifile = 200 + 10 * state%space%adapt%adapt_level
!    !print*, 'ifile = = ', ifile
!
!    call Eval_Larger_systems( grid )
!
!    ! subroutine RitzReconstruction can not be used, different arrays
!    ! we use its modified copy
!
!    deg_plus = .true.
!
!    if (.not. allocated( grid%elem(1)%blockPlus ) ) &
!      stop 'blockPlus is not allocated in RitzReconstruction'
!
!    !nsize = size( grid%elem(1)%blockPlus%Mb(:,1) )
!    !do i=1,nsize
!    !   write(*,'(a8,i5, 300es12.4)') 'MB:;', i, grid%elem(1)%blockPlus%Mb(i,1:nsize)
!    !enddo
!
!
!    ! primal problem
!
!    ! fill the RHS
!    loc_implicitly = state%nlSolver%implicitly
!    state%nlSolver%implicitly = .false.
!
!    call ComputeSTDGM_Terms( deg_plus )
!
!    state%nlSolver%implicitly = loc_implicitly
!
!    do i = 1, grid%nelem
!       elem => grid%elem(i)
!       dofM = elem%dof
!       dof = elem%dof_plus
!       Tdof = elem%Tdof
!       if(Tdof /= 1) print*,'Trouble in RitzReconstr_PrimalDual (1) '
!       nsize = size( grid%elem(i)%blockPlus%Mb(:,1) )
!       allocate( b(1:nsize), source = 0.0 )
!
!       !if(i == 1) &
!       !     write(*,'(a10, 18i5)') 'SIZES:', &
!       !     i, elem%deg , elem%dof, elem%dof_plus, dof, dofM,  Tdof, ndim, nsize
!
!       b(1:nsize) = copy3Darrayto1Darray( elem%rhsST( 1:ndim,1:dof,1:Tdof), nsize )
!       !print*, 'b 1st = ' , norm2(b(1:nsize))
!
!
!       call SolveLocalMatrixProblem(nsize, elem%blockPlus%Mb(1:nsize,1:nsize), 1,b(1:nsize))
!
!       !rhs -> wSTplus
!       if ( .not. associated( elem%wSTplus ) ) stop "array elem%wSTplus not allocated)!!"
!
!       ! original solution
!
!
!       ! p+1 update
!       ! FR - put it into wSTplus(,,1) CHANGED !!!
!!       elem%wSTplus(1:ndim, 1:dof, -1:-1) = copy1DarrayTo3Darray( &
!!                                       b(1:nsize), ndim, dof, Tdof)
!        elem%wSTplus(1:ndim, 1:dof, tIndex:tIndex ) = copy1DarrayTo3Darray( &
!                                       b(1:nsize), ndim, dof, Tdof)
!        elem%wSTplus(1:ndim, 1:dofM, tIndex:tIndex) = elem%wSTplus(1:ndim, 1:dofM, tIndex:tIndex) + &
!                                            elem%wST(1:ndim, 1:dofM, 1:1)
!       !CONTROL - the reconstruction is not zero somewhere
!        !if ( norm2(b(:)) < 1E-9 ) then
!        !  print*,'norm2(b(:)) < 1E-9 ', norm2(b)
!        !endif
!    !enddo
!
!
!    ! dual !will be never called for DWR_P
!    !if ( .not. present(DWR) ) &
!    !stop 'RitzReconstruction with primal=.false. cannot be called without DWR, needed for the rhs!'
!
!    !do i = 1, grid%nelem
!       !elem => grid%elem(i)
!       !dof = elem%dof_plus
!       !Tdof = elem%Tdof
!       !nsize = size( grid%elem(i)%blockPlus%Mb(:,1) )
!       !allocate( b(1:nsize), source = 0.0 )
!
!       !write(*,'(a10, 8i5)') 'sizes:', i, elem%deg , dof, Tdof, ndim, nsize!, &
!
!       b(1:nsize) = DWR%dualRes(i)%copyTo1Darray3_1_2( nsize )
!       call SolveLocalTrasposedMatrixProblem(nsize, elem%blockPlus%Mb(1:nsize,1:nsize), 1, b(1:nsize) )
!
!       !rhs -> zSTplus
!       if ( .not. associated( elem%zSTplus ) ) stop "array elem%zSTplus not allocated)!!"
!
!       ! original solution
!       !elem%zSTplus(1:ndim, 1:dofM, -1:-1) = elem%zST(1:ndim, 1:dofM, 1:1)
!
!       ! p+1 update
!       ! FR - put it into zSTplus(,,1) CHANGED !!!
!!       elem%zSTplus(1:ndim, 1:dof,-1:-1) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)
!       elem%zSTplus(1:ndim, 1:dof,tIndex:tIndex) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)
!       ! FR: ????
!       elem%zSTplus(1:ndim, 1:dofM, tIndex:tIndex) = elem%zST(1:ndim, 1:dofM, 1:1) + &
!                                            elem%zSTplus(1:ndim, 1:dofM, tIndex:tIndex)
!
!       !call PlotElemFunction3D(ifile+3, elem, dof, elem%wSTplus( 1:ndim, 1:dof, tIndex) )
!       !call PlotElemFunction3D(ifile+4, elem, dof, elem%zSTplus( 1:ndim, 1:dof, tIndex) )
!
!       ! call PlotElemFunction3D(ifile+5, elem, dofM, elem%wST( 1:ndim, 1:dofM, 1) )
!       ! call PlotElemFunction3D(ifile+6, elem, dofM, elem%zST( 1:ndim, 1:dofM, 1) )
!
!       ! call PlotElemFunction3D(ifile+7, elem, dof, elem%wSTplus( 1:ndim, 1:dof, 1) )
!       ! call PlotElemFunction3D(ifile+8, elem, dof, elem%zSTplus( 1:ndim, 1:dof, 1) )
!
!
!       !CONTROL - the reconstruction is not zero somewhere
!       !if ( norm2(b(:)) < 1E-9 ) then
!       !   print*,'norm2(b(:)) < 1E-9 ', norm2(b)
!       !endif
!       deallocate( b )
!    enddo
!
!    !do i = 1, grid%nelem
!    !   elem => grid%elem(i)
!    !
!    !   dofP = elem%dof_plus
!    !   write(*,'(a10, 2i5, 300es12.4)') 'primal:',i,dofP, elem%wSTplus( 1:ndim, 1:dofP, -1)
!    !   write(*,'(a10, 2i5, 300es12.4)') ' dual :',i,dofP, elem%zSTplus( 1:ndim, 1:dofP, -1)
!    !enddo
!
!  end subroutine RitzReconstr_PrimalDual

!  !> Evaluation of larger systems
!  subroutine Eval_Larger_systems( grid )
!    class( mesh ), intent(inout) :: grid
!    logical :: deg_plus,  loc_implicitly
!    class( element ), pointer :: elem
!    real, allocatable, dimension(:) :: x,b
!    integer :: i, mdof, dof, Tdof, ndof, kvec, ivec, l, k , nsize, plusDeg
!
!    plusDeg = state%p_mod_max
!
!    ! we need the larger matrix
!    print*, 'FR updateProblemPlus called in Eval_Larger_systems'
!    call updateProblemPlus( grid, .true. )
!
!    loc_implicitly = state%nlSolver%implicitly
!    state%nlSolver%implicitly = .true.
!
!    ! = .false. since the degree was already increased in updateProblemPlus( grid, .true.)
!    deg_plus = .false.
!    ! evaluation of the larger (p+1) blockPlus
!    call ComputeSTDGM_Terms( deg_plus )
!
!    call CopyBlocksSTtoBlockPlus()
!
!
!
!    nsize = sum( grid%elem(:)%dof * ndim * grid%elem(:)%Tdof )
!
!    allocate( x(1:nsize), source = 0.0 )
!    allocate( b(1:nsize), source = 0.0 )
!
!
!    ! fill zST to x
!    ! ALGEB - use DWR%x -> x, just enlarge the size
!    ivec = 0
!    do i=1,grid%nelem
!       elem => grid%elem(i)
!       !dof_plus = elem%dof_plus
!       ! lower dof ~ p
!
!       mdof = DOFtriang( elem%deg - plusDeg )
!       ! ~ p+1
!       dof = elem%dof
!       Tdof = elem%Tdof
!       ndof = dof * Tdof * ndim
!       kvec = ivec
!
!       do l = 1, Tdof
!          do k = 1, ndim
!             x(kvec+1:kvec + mdof) =  elem%zST(k,1:mdof, l)
!             x(kvec+1+mdof:kvec+dof) = 0.0
!             kvec = kvec + dof
!          enddo !k
!       enddo !l
!
!       ivec = ivec + ndof
!    end do !i
!
!    ! multiply zST^T * C -> b
!    ! p has to be increased , otherwise we multiply only with a part from the matrix
!    call bMVprodST_Dual( b, x, nsize )
!    !      print*, 'x :' norm2(x)
!    !      print*, 'b :', norm2(b)
!    ! control b - the first columns should be almost zero
!
!    ! DECREASE THE POLYNOMIAL DEGREE NOW
!    !      print*, 'DECREASE THE POLYNOMIAL DEGREE NOW '
!    !      print*, 'WATCH: We may not allocate everything, since mesh adaptation is coming now.'
!    !      UPDATE IN FUTURE: We may not allocate everything, since mesh adaptation is coming now.
!    call updateProblemPlus( grid, .false. )
!
!    ! instead of wSS , use DWR%dualRes(:)%x(:,:,:) - used to Ritz reconstruction
!    call DWR%fillDualResVector( grid, nsize, b(1:nsize) )
!    deallocate( x, b )
!
!
!    print*, '!!! ComputeSTDGM_Terms impl=True was computed only for the bigger system and not back for the smaller one!'
!    !      call ComputeSTDGM_Terms( )
!
!    state%nlSolver%implicitly = loc_implicitly
!
!  end subroutine Eval_Larger_systems



  !> reconstruction using the solution of the local problems - Ritz reconstruction
  !> compute the Ritz reconstruction based on the local problem
  !> \f$ a(uPlus, \varphi) = res(\varphi) \forall \varphi \in S_{hp}\f$
  !> primal / dual solution is used
  !> new version using bigBlock instead of blockST
  subroutine RitzReconstr_PrimalDual_big( grid )
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    real, dimension(:), allocatable :: b
    !logical :: loc_implicitly
    integer :: i, j, dofP, nsize, dof, Tdof, ifile1, ifile2
    integer :: tIndex, p_mod, q_mod
    integer, dimension(1:6) :: dimensions
    logical :: transposed, loc_implicitly

    !print*, "RitzReconstr_PrimalDual_big called!", tIndex

    ! test if bigBlock matrix is ready


    if (.not. state%state_of_terms%is_matrix_ready_warning(.true., .false.)) then
      ! set implicitly
      loc_implicitly = state%nlSolver%implicitly
      state%nlSolver%implicitly = .true.

      call ComputeST_Terms( .true. )
      state%nlSolver%implicitly = loc_implicitly
      print*, "matrix was not ready we need to compute it"
    end if

    ! test if rhs vector is ready
    if (.not. state%state_of_terms%is_res_ready_warning(.true., .false.) ) then
      ! set implicitly
      loc_implicitly = state%nlSolver%implicitly
      state%nlSolver%implicitly = .false.
      call ComputeST_Terms( .true. )
      state%nlSolver%implicitly = loc_implicitly
      print*,  "res vec was not ready we need to compute it"
    end if



    p_mod = state%getP_mod( )
    q_mod = state%getQ_mod( )

    call state%setP_mod( 1 ) ! space deg + 1
    call state%setQ_mod( 0 ) ! time + 0

    ifile1 = 1000 + 100 * state%space%adapt%adapt_level + state%time%iter_loc
    ifile2 = 1000 + 100 * state%space%adapt%adapt_level + state%time%iter_loc + 50


    if (.not. allocated( grid%elem(1)%bigBlock(0)%Mb ) ) &
      stop 'bigBlock is not allocated in RitzReconstruction'


    !!! primal problem !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


!    state%nlSolver%implicitly = loc_implicitly

    do i = 1, grid%nelem
      elem => grid%elem(i)

      !rhs -> wSTplus
      if ( .not. associated( elem%wST_Ritz ) ) stop "array elem%wST_Ritz not allocated)!!"
      !rhs -> zSTplus
      if ( .not. associated( elem%zST_Ritz ) ) stop "array elem%zST_Ritz not allocated)!!"

      dof = DOFtriang( elem%deg + state%getP_mod() )
      Tdof = elem%Tdof + state%getQ_mod()
      if(Tdof /= 1) print*,'Trouble in RitzReconstr_PrimalDual_big (3)'
      nsize = dof * ndim * Tdof !size

      allocate( b(1:nsize), source = 0.0 )

!      print*, "rhsST:", elem%rhsST( 1,1:dof,1:Tdof)




      ! watch for the ordering
      b(1:nsize) = copy3Darrayto1Darray( elem%rhsST( 1:ndim,1:dof,1:Tdof), nsize )
      dimensions = (/ dof, dof, ndim, ndim, Tdof, Tdof /)

      transposed = .false.
!      print*, "Transposed = ", transposed, i

      !!if(i == 199)  call WriteBigBlock(dimensions, elem%bigBlock(0) )

      call SolveLocalBigMatrixProblem(nsize, dimensions, elem%bigBlock(0), 1, b(1:nsize), transposed )

      elem%wST_Ritz(1:ndim, 1:dof, 1:1 ) = 0.0 ! is it needed?

      elem%wST_Ritz(1:ndim, 1:dof, 1:1 ) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)

      ! b contains only the update of wST, not the whole wSTplus
      elem%wST_Ritz(1:ndim, 1:elem%dof, 1:1) = elem%wST_Ritz(1:ndim, 1:elem%dof,1:1)  &
           + elem%wST(1:ndim, 1:elem%dof, 1:1)


      ! FIRST of all we have to compute the dual residual to the plus dimenstion
      !    call DWR%fillDualResVector( grid, nsize, b(1:nsize) )

!      print*, 'copyTo1Darray3_1_2 called HERE'
!      print*, 'cdsda' , DWR%dualRes(i)%getSize()
      !b(1:nsize) = DWR%dualRes(i)%copyTo1Darray3_1_2( nsize )
      b(1:nsize) = DWR%dualRes(i)%copyTo1Darray3_1_2( (/ ndim, dof, Tdof /) )

      transposed = .true.
      call SolveLocalBigMatrixProblem(nsize, dimensions, elem%bigBlock(0), 1, b(1:nsize), transposed )

      elem%zST_Ritz(1:ndim, 1:dof,1:1) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)
      ! b contains only the update
      elem%zST_Ritz(1:ndim, 1:elem%dof,1:1) = elem%zST(1:ndim, 1:elem%dof, 1:1)  &
           + elem%zST_Ritz(1:ndim, 1:elem%dof,1:1)

      !call PlotElemFunction3D(ifile+1, elem, dof, elem%wST_Ritz( 1:ndim, 1:dof, 1) )

      !call SetOneElementIC_PLUS(elem, dof, elem%wST_Ritz( 1:ndim, 1:dof, 1) )
      !call SetOneElementIC_PLUS(elem, dof, elem%zST_Ritz( 1:ndim, 1:dof, 1) )
      !call PlotElemFunction3D(ifile1, elem, elem%dof, elem%wST( 1, 1:elem%dof, 1) )
      !call PlotElemFunction3D(ifile2, elem, dof, elem%wST_Ritz( 1, 1:dof, 1) )

      !call PlotElemFunction3D(ifile+2, elem, dof, elem%zST_Ritz( 1:ndim, 1:dof, 1) )

      deallocate( b )
    enddo

    ! original setting
    call state%setP_mod( p_mod )
    call state%setQ_mod( q_mod )

  end subroutine RitzReconstr_PrimalDual_big


  !> reconstruction using the solution of the local problems - Ritz reconstruction
  !> compute the Ritz reconstruction based on the local problem
  !> \f$ a(uPlus, \varphi) = res(\varphi) \forall \varphi \in S_{hp}\f$
  !> primal / dual solution is used
  !> new version using bigBlock instead of blockST
  subroutine IterativeRitzReconstr_PrimalDual_big( grid )
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    real, dimension(:), allocatable :: b
    real, dimension(:,:,:), allocatable :: updateW, testRitz
    logical :: loc_implicitly
    integer :: i, j, dofP, nsize, dof, Tdof, ifile1, ifile2, kk, num_nonconv, num_nonconv1
    integer :: tIndex, p_mod, q_mod, max_iterations
    integer, dimension(1:6) :: dimensions
    logical :: transposed, deg_plus, wSTRitz
    real :: residual, residualOld, tolerance, normUpdate, condNumber
    real :: lambda, lambda_old
    integer :: ll , max_iterations_damping
    type( Mblock) :: locDiagBlock
    ! for testing the condition number
    real :: min_cond_number, max_cond_number, aver_cond_number
    integer :: min_cond_number_i, max_cond_number_i, aver_cond_number_i
    logical :: iprint = .false.



    min_cond_number = 0.0
    max_cond_number = 0.0
    aver_cond_number = 0.0

    !print*, "IterativeRitzReconstr_PrimalDual_big called!"
    num_nonconv = 0
    num_nonconv1 = 0

    p_mod = state%getP_mod( )
    q_mod = state%getQ_mod( )

    call state%setP_mod( 1 ) ! space deg + 1
    call state%setQ_mod( 0 ) ! time + 0

    ! parameters for calling ComputeTerms
    deg_plus = .true.
    wSTRitz = .true.

    ifile1 = 1000 + 100 * state%space%adapt%adapt_level + state%time%iter_loc
    ifile2 = 1000 + 100 * state%space%adapt%adapt_level + state%time%iter_loc + 50

    !do i = 1, grid%nelem
    !   elem => grid%elem(i)
    !   call PlotElemFunction3D(ifile1, elem, elem%dof, elem%wST( 1, 1:elem%dof, 1) )
    !enddo


    if (.not. allocated( grid%elem(1)%bigBlock(0)%Mb ) ) &
         stop 'bigBlock is not allocated in RitzReconstruction'


!!! primal problem !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    loc_implicitly = state%nlSolver%implicitly

    tolerance = 1.E-8!1.E-10

    max_iterations = 10
    !print*, "Tolerance of the iterative Ritz reconstruction:", tolerance, max_iterations

    do i = 1, grid%nelem
       elem => grid%elem(i)

       max_iterations = 10
       if(elem%rezid > 1E-03) then
          !print*,'elem%rezid = ', elem%rezid, elem%i, elem%xc
          max_iterations = 1
          num_nonconv1 = num_nonconv1 + 1
       endif

       iprint = .false.
       !if(elem%i == 279) iprint = .true.

       !rhs -> wST_Ritz
       if ( .not. associated( elem%wST_Ritz ) ) stop "array elem%wST_Ritz not allocated)!!"
       !rhs -> zSTplus
       if ( .not. associated( elem%zST_Ritz ) ) stop "array elem%zST_Ritz not allocated)!!"

       dof = elem%getActualDof()
       Tdof = elem%getActualTDof()
       if(Tdof /= 1) print*,'Trouble in RitzReconstr_PrimalDual_big (4)'
       nsize = dof * ndim * Tdof !size
       allocate( b(1:nsize), source = 0.0 )
       allocate( testRitz(1:ndim, 1:dof, 1:Tdof) )
       dimensions = (/ dof, dof, ndim, ndim, Tdof, Tdof /)

!!! DUAL RECONSTRUCTION - linear -> no iteration is needed
       b(1:nsize) = DWR%dualRes(i)%copyTo1Darray3_1_2( (/ ndim, dof, Tdof /) )
       transposed = .true.

       ! from the 6-dimensional to 2-dim
       !YYY call InitMblock( locDiagBlock, nsize, nsize )
       !YYY call elem%bigBlock(0)%copyBigBlockToMBlock(locDiagBlock, elem%bigBlock(0)%dimensions() )

       ! TODO HERE
       !YYY condNumber = MatrixConditionNumber( nsize , locDiagBlock%Mb(1:nsize,1:nsize))

       !YYY call DeleteMblock( locDiagBlock )

       !print*, "--- condition number ---, elem = ", elem%i , "cond: ", condNumber

       !YYY if (condNumber > max_cond_number) then
       !YYY   max_cond_number = condNumber
       !YYY   max_cond_number_i = i
       !YYY end if

       !YYY if (condNumber < min_cond_number .or. i == 1) then
       !YYY   min_cond_number  = condNumber
       !YYY   min_cond_number_i = i
       !YYY end if

       aver_cond_number = aver_cond_number + condNumber

       !if(i == 174)  call WriteBigBlock(dimensions, elem%bigBlock(0) )

       call SolveLocalBigMatrixProblem(nsize, dimensions, elem%bigBlock(0), 1, b(1:nsize), transposed )

       elem%zST_Ritz(1:ndim, 1:dof,1:1) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)
       ! b contains only the update
       elem%zST_Ritz(1:ndim, 1:elem%dof,1:1) = elem%zST(1:ndim, 1:elem%dof, 1:1)  &
            + elem%zST_Ritz(1:ndim, 1:elem%dof,1:1)


!!! PRIMAL RECONSTRUCTION !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       allocate( updateW(1:ndim,1:dof, 1:Tdof), source = 0.0 )


       ! init - wST_Ritz = wST
       elem%wST_Ritz(1:ndim, 1:dof, 1:1 ) = 0.0
       elem%wST_Ritz(1:ndim, 1:elem%dof, 1:1 ) = elem%wST(1:ndim, 1:elem%dof, 1:1)


       state%nlSolver%implicitly = .false.
       !print*, "RHS norm before iterations: ", norm2( elem%rhsST )
       call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
       residual = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )
       residualOld = residual

       kk = 1

       do while (residual > tolerance .and. kk <= max_iterations)

          ! MATRIX update
          state%nlSolver%implicitly = .true.
          call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)


          ! rhsST -> to long vector
          b(1:nsize) = copy3Darrayto1Darray( elem%rhsST( 1:ndim,1:dof,1:Tdof), nsize )


          ! Step of the Newton methos - compute update
          transposed = .false.
          call SolveLocalBigMatrixProblem(nsize, dimensions, elem%bigBlock(0), 1, b(1:nsize), transposed )

          updateW(1:ndim, 1:dof, 1:Tdof ) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)


          ! NEWTON WITH DAMPING
          lambda = 1.0
          lambda_old = 1.0
          ! not necessary to compute, simplification
          ! call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
          !residualOld = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )
          !residualOld = residual

          ll = 1
          max_iterations_damping = 8

          ! b contains only the update of wST, not the whole wSTplus
          elem%wST_Ritz(1:ndim, 1:dof, 1:1) = elem%wST_Ritz(1:ndim, 1:dof,1:1)  &
               + updateW(1:ndim, 1:dof, 1:1)

          !print*, "RHS norm after ", kk ,"-th iteration: ", norm2( elem%rhsST )
          state%nlSolver%implicitly = .false.
          call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
          residual = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )

          ! DAMPING IS NEEDED
          do while ( residual >= residualOld .and. ll <= max_iterations_damping )

             lambda = lambda / 2.0
             ! remove part of the update and adding the new one
             elem%wST_Ritz(1:ndim, 1:dof, 1:1) = elem%wST_Ritz(1:ndim, 1:dof,1:1) - &
                  + (lambda -lambda_old) * updateW(1:ndim, 1:dof, 1:1)

             call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
             residual = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )

             lambda_old = lambda

             ll = ll + 1
          end do

          if ( residual > residualOld) then
             !   !if(kk == 1) print*
             !if(iprint) &
             !     write(*,'(a30,i6,a4,i5,a13,es12.4,a3,2es12.4)') &
             !     "Resid in Iterative Ritz! el= ", elem%i, ", it=", kk, &
             !     ", new > old: " , residual , " > ", residualOld, lambda
             !exit
          end if

          residualOld = residual

          !print*,'# iters finished:', elem%i, residual, kk, lambda

          kk = kk + 1
       end do !!  while (residual > tolerance .and. kk <= max_iterations)


       if (residual > tolerance) then
          num_nonconv = num_nonconv + 1
          if(num_nonconv <=10) then
          !   write(*,'(a10,i6,a20,i5, a14, es12.4, a8,es12.4 )')  &
          !        "Ritz rec: i=", elem%i , &
          !        " did not converge in ", max_iterations, &
          !        " under  TOL = ", tolerance, ", rez = ", residual
             !iprint = .true.
          endif

          !stop "Stopping now!"
       end if


       if (residual > 1.E+1) then
          !print*,"The iterative Ritz reconstruction diverged!"
          ! stop
       end if

       !      if (norm2(testRitz-elem%wST_Ritz) > 1.E-3) then
       !        print*, "elem i = ", elem%i, norm2(testRitz-elem%wST_Ritz)
       !        print*, "wST_Ritz =" , elem%wST_Ritz
       !        print*, "first it =" , testRitz
       !      end if

       deallocate( b, updateW , testRitz)

       ! In the end we need to fill matrix and RHS with the original data ~ wST
       state%nlSolver%implicitly = .true.
       call ComputeSTDiag_Terms_loc(elem, deg_plus, .false.)
       state%nlSolver%implicitly = .false.
       call ComputeSTDiag_Terms_loc(elem, deg_plus, .false.)

       !      print*, "ERROR - for mesh only!"
       !      elem%eta(:,:) = elem%i


       !if(iprint) then
       !   call PlotElemFunction3D(81, elem, dof, elem%zST_Ritz(1, 1:dof, 1:1) )
       !   call PlotElemFunction3D(91, elem, dof, elem%wST_Ritz(1, 1:dof, 1:1) )
       !   call PlotElemFunction3D(ifile2, elem, dof, elem%wST_Ritz( 1, 1:dof, 1) )
       !endif

       !call PlotElemFunction3D(61, elem, elem%dof, elem%zST(1, 1:elem%dof, 1:1) )
       !call PlotElemFunction3D(71, elem, elem%dof, elem%wST(1, 1:elem%dof, 1:1) )

       !call PlotElemFunction3D(ifile2, elem, dof, elem%wST_Ritz( 1, 1:dof, 1) )

    enddo ! do i=1, grid%nelem

    !stop "ur39ur39j3oje"

    ! TEST CONDITION NUMBER
    !aver_cond_number = aver_cond_number / grid%nelem
    !print*, "CONDITION NUMBERS OF THE DIAG BLOCKS:"
    !print*, "MIN COND: elem = " , min_cond_number_i , "val = ", min_cond_number
    !print*, "MAX COND: elem = " , max_cond_number_i , "val = ", max_cond_number
    !print*, "AVER COND: val = ", aver_cond_number
    !print*, "*** ***"


    if(num_nonconv > 0 .or. num_nonconv1 > 0) &
       write(*,'(a15, i6,a1,i6, a33)') &
            'IterativeRitz:', num_nonconv,'(', num_nonconv1, ') elems without "convergence" ! '


    state%nlSolver%implicitly = loc_implicitly
    ! original setting
    call state%setP_mod( p_mod )
    call state%setQ_mod( q_mod )

    !stop "IterativeRitzReconstr_PrimalDual_big END"

  end subroutine IterativeRitzReconstr_PrimalDual_big



  !> reconstruction using the solution of the local problems - Ritz reconstruction
  !> compute the Ritz reconstruction based on the local problem
  !> \f$ a(uPlus, \varphi) = res(\varphi) \forall \varphi \in S_{hp}\f$
  !> primal / dual solution is used
  !> new version using bigBlock instead of blockST
  subroutine IterativeRitzReconstr_PrimalST_big( grid )
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    real, dimension(:), allocatable :: b, a, c
    real, dimension(:,:,:), allocatable :: updateW, testRitz
    logical :: loc_implicitly
    integer :: i, j, dofP, nsize, dof, Tdof, ifile1, ifile2, kk, num_nonconv, num_nonconv1
    integer :: tIndex, p_mod, q_mod, max_iterations
    integer, dimension(1:6) :: dimensions
    logical :: transposed, deg_plus, wSTRitz
    real :: residual, residualOld, tolerance, normUpdate, condNumber
    real :: lambda, lambda_old
    integer :: ll , max_iterations_damping
    type( Mblock) :: locDiagBlock
    ! for testing the condition number
    real :: min_cond_number, max_cond_number, aver_cond_number, max_resid
    integer :: min_cond_number_i, max_cond_number_i, aver_cond_number_i
    logical :: iprint = .false.


    if(state%time%iter == 1 )then
       print*,' call ComputeSTDiag_Terms_loc must be called after ttime+=tau', &
            'key word "CTYW" in terms.f90' 
    endif


    min_cond_number = 0.0
    max_cond_number = 0.0
    aver_cond_number = 0.0

    !print*, "IterativeRitzReconstr_PrimalDual_big called!"
    num_nonconv = 0
    num_nonconv1 = 0

    p_mod = state%getP_mod( )
    q_mod = state%getQ_mod( )

    call state%setP_mod( 1 ) ! space deg + 1
    call state%setQ_mod( 1 ) ! time + 1
    !print*,'GERE call state%setQ_mod( 1 ) ! time + 0'
    
    ! parameters for calling ComputeTerms
    deg_plus = .true.
    wSTRitz = .true.

    ifile1 = 1000 + 100 * state%space%adapt%adapt_level + state%time%iter_loc
    ifile2 = 1000 + 100 * state%space%adapt%adapt_level + state%time%iter_loc + 50

    !do i = 1, grid%nelem
    !   elem => grid%elem(i)
    !   call PlotElemFunction3D(ifile1, elem, elem%dof, elem%wST( 1, 1:elem%dof, 1) )
    !enddo


    if (.not. allocated( grid%elem(1)%bigBlock(0)%Mb ) ) &
         stop 'bigBlock is not allocated in RitzReconstruction'

!!! primal problem !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

    loc_implicitly = state%nlSolver%implicitly

    tolerance = 1.E-8!1.E-10
    max_resid = 0.
    max_iterations = 12
    !print*, "Tolerance of the iterative Ritz reconstruction:", tolerance, max_iterations

    do i = 1, grid%nelem
       elem => grid%elem(i)

       !max_iterations = 12
       !if(elem%rezid > 1E-03) then
       !   !print*,'elem%rezid = ', elem%rezid, elem%i, elem%xc
       !   max_iterations = 1
       !   num_nonconv1 = num_nonconv1 + 1
       !endif

       !iprint = .false.
       !if(elem%i == 2) iprint = .true.

       !rhs -> wST_Ritz
       if ( .not. associated( elem%wST_Ritz ) ) stop "array elem%wST_Ritz not allocated)!!"
       !rhs -> zSTplus
       !if ( .not. associated( elem%zST_Ritz ) ) stop "array elem%zST_Ritz not allocated)!!"

       dof = elem%getActualDof()
       Tdof = elem%getActualTDof()
       !if(Tdof /= 1) print*,'Trouble in RitzReconstr_PrimalDual_big (5)'
       nsize = dof * ndim * Tdof !size
       allocate( b(1:nsize), source = 0.0 )
       allocate( a(1:nsize), source = 0.0 )
       allocate( c(1:nsize), source = 0.0 )
       !allocate( testRitz(1:ndim, 1:dof, 1:Tdof) )
       dimensions = (/ dof, dof, ndim, ndim, Tdof, Tdof /)

!!! DUAL RECONSTRUCTION - linear -> no iteration is needed
       ! b(1:nsize) = DWR%dualRes(i)%copyTo1Darray3_1_2( (/ ndim, dof, Tdof /) )
       ! transposed = .true.

       ! ! from the 6-dimensional to 2-dim
       ! !YYY call InitMblock( locDiagBlock, nsize, nsize )
       ! !YYY call elem%bigBlock(0)%copyBigBlockToMBlock(locDiagBlock, elem%bigBlock(0)%dimensions() )

       ! ! TODO HERE
       ! !YYY condNumber = MatrixConditionNumber( nsize , locDiagBlock%Mb(1:nsize,1:nsize))

       ! !YYY call DeleteMblock( locDiagBlock )

       ! !print*, "--- condition number ---, elem = ", elem%i , "cond: ", condNumber

       ! !YYY if (condNumber > max_cond_number) then
       ! !YYY   max_cond_number = condNumber
       ! !YYY   max_cond_number_i = i
       ! !YYY end if

       ! !YYY if (condNumber < min_cond_number .or. i == 1) then
       ! !YYY   min_cond_number  = condNumber
       ! !YYY   min_cond_number_i = i
       ! !YYY end if

       ! aver_cond_number = aver_cond_number + condNumber

       ! !if(i == 174)  call WriteBigBlock(dimensions, elem%bigBlock(0) )

       ! call SolveLocalBigMatrixProblem(nsize, dimensions, elem%bigBlock(0), 1, b(1:nsize), transposed )

       ! elem%zST_Ritz(1:ndim, 1:dof,1:1) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)
       ! ! b contains only the update
       ! elem%zST_Ritz(1:ndim, 1:elem%dof,1:1) = elem%zST(1:ndim, 1:elem%dof, 1:1)  &
       !      + elem%zST_Ritz(1:ndim, 1:elem%dof,1:1)



!!! PRIMAL RECONSTRUCTION !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
       allocate( updateW(1:ndim,1:dof, 1:Tdof), source = 0.0 )


       ! init - wST_Ritz = wST
       elem%wST_Ritz(1:ndim, 1:dof, 1:Tdof ) = 0.0
       elem%wST_Ritz(1:ndim, 1:elem%dof, 1:elem%Tdof ) = elem%wST(1:ndim, 1:elem%dof, 1:elem%Tdof)


       state%nlSolver%implicitly = .false.
       !if(iprint) print*, "RHS norm before iterations 1: ", elem%dof, dof, norm2( elem%rhsST )
       !elem%rhsST = 0.

       call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
       residual = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )
       residualOld = residual
       
       !if(iprint) print*, "RHS norm before iterations 2: ", elem%i, norm2( elem%rhsST )

       kk = 1

       do while ( kk == 1 .or. (residual > tolerance .and. kk <= max_iterations))
          
          ! MATRIX update
          state%nlSolver%implicitly = .true.
          call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)


          !if(iprint) then
          !
          !   write(*,'(a10,2i5,30es12.4)') 'RHS bf', kk, nsize, elem%rhsST(1:ndim,1:dof,1:Tdof)
          !endif

          ! rhsST -> to long vector
          b(1:nsize) = copy3Darrayto1Darray( elem%rhsST( 1:ndim,1:dof,1:Tdof), nsize )
          a(1:nsize) = b(1:nsize)
          
          !if(iprint) then
          !   write(*,*)
          !   !call WriteBigBlock(dimensions, elem%bigBlock(0))
          !   write(*,'(a10, 8i5)') 'dimensions=',dimensions
          !   write(*,'(a10,2i5,30es12.4)') 'b_rhs', kk, nsize, b(1:nsize)
          !endif
          
          ! Step of the Newton methos - compute update
          transposed = .false.
          call SolveLocalBigMatrixProblem(nsize, dimensions, elem%bigBlock(0), 1, b(1:nsize), transposed )

          updateW(1:ndim, 1:dof, 1:Tdof ) = copy1DarrayTo3Darray(b(1:nsize), ndim, dof, Tdof)

          call vectorMulBigBlockMask(elem%bigBlock(0), dimensions, b, nsize,  c, nsize)
          
          !if(iprint) then
          !   !call WriteBigBlock(dimensions, elem%bigBlock(0))
          !   !write(*,*)
          !   write(*,'(a10,2i5,30es12.4)') 'b_sol', kk, nsize, b(1:nsize)
          !   write(*,'(a10,2i5,30es12.4)') 'residual', kk, nsize, norm2(a(1:nsize) - c(1:nsize))
          !endif
          


          ! NEWTON WITH DAMPING
          lambda = 1.0
          lambda_old = 1.0
          ! not necessary to compute, simplification
          ! call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
          !residualOld = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )
          !residualOld = residual

          ll = 1
          max_iterations_damping = 8

          ! b contains only the update of wST, not the whole wSTplus
          elem%wST_Ritz(1:ndim, 1:dof, 1:Tdof) = elem%wST_Ritz(1:ndim, 1:dof,1:Tdof)  &
               + updateW(1:ndim, 1:dof, 1:Tdof)

          state%nlSolver%implicitly = .false.
          call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
          residual = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )

          !if(iprint) then
          !    write(*,'(a10,2i5,30es12.4)') 'RHS af', kk, nsize, elem%rhsST(1:ndim,1:dof,1:Tdof)
          !   print*,  "RHS norm after ", kk ,"-th iteration: ", &
          !          residual, residualOld, ll
          !endif
          
          !if(iprint) then
          !   write(*,'(a10,2i5,30es12.4)') 'RHS af1', kk, nsize, elem%rhsST(1:ndim,1:dof,1:Tdof)
          !endif

          ! DAMPING IS NEEDED
          do while ( residual >= residualOld .and. ll <= max_iterations_damping )

             lambda = lambda / 2.0
             ! remove part of the update and adding the new one
             elem%wST_Ritz(1:ndim, 1:dof, 1:1) = elem%wST_Ritz(1:ndim, 1:dof,1:1) - &
                  + (lambda -lambda_old) * updateW(1:ndim, 1:dof, 1:1)

             call ComputeSTDiag_Terms_loc(elem, deg_plus, wSTRitz)
             residual = norm2( elem%rhsST(1:ndim,1:dof,1:Tdof) )

             lambda_old = lambda

             ll = ll + 1
             !if(iprint) print*,'##@#', ll, lambda

             !if(iprint) then
             !write(*,'(a10,2i5,60es12.4)') 'RHS af2', kk, ll, residual, residualOld, elem%rhsST(1:ndim,1:dof,1:Tdof)
             !endif

             
          end do

          !if ( residual > residualOld) then
             !   !if(kk == 1) print*
             !if(iprint) &
             !     write(*,'(a30,i6,a4,i5,a13,es12.4,a3,2es12.4)') &
             !     "Resid in Iterative Ritz! el= ", elem%i, ", it=", kk, &
             !     ", new > old: " , residual , " > ", residualOld, lambda
             !exit
          !end if

          residualOld = residual
          
          !if(iprint) print*,'# iters finished:', elem%i, residual, kk, lambda
          
          kk = kk + 1
       end do !!  while (residual > tolerance .and. kk <= max_iterations)


       if (residual > tolerance) then
          num_nonconv = num_nonconv + 1
          max_resid = max(max_resid, residual)
          !if(num_nonconv <=10) then
          !   write(*,'(a10,i6,a20,i5, a14, es12.4, a8,es12.4 )')  &
          !        "Ritz rec: i=", elem%i , &
          !        " did not converge in ", max_iterations, &
          !        " under  TOL = ", tolerance, ", rez = ", residual
             !iprint = .true.
          !endif

          !stop "Stopping now!"
       end if


       !if (residual > 1.E+1) then
       !   !print*,"The iterative Ritz reconstruction diverged!"
       !   ! stop
       !end if

       !      if (norm2(testRitz-elem%wST_Ritz) > 1.E-3) then
       !        print*, "elem i = ", elem%i, norm2(testRitz-elem%wST_Ritz)
       !        print*, "wST_Ritz =" , elem%wST_Ritz
       !        print*, "first it =" , testRitz
       !      end if

       deallocate( b, a, c, updateW )
       !deallocate( testRitz)

       ! In the end we need to fill matrix and RHS with the original data ~ wST
       state%nlSolver%implicitly = .true.
       call ComputeSTDiag_Terms_loc(elem, deg_plus, .false.)
       state%nlSolver%implicitly = .false.
       call ComputeSTDiag_Terms_loc(elem, deg_plus, .false.)

       !      print*, "ERROR - for mesh only!"
       !      elem%eta(:,:) = elem%i


       !if(iprint) then
       !   call PlotElemFunction3D(81, elem, dof, elem%zST_Ritz(1, 1:dof, 1:1) )
       !   call PlotElemFunction3D(91, elem, dof, elem%wST_Ritz(1, 1:dof, 1:1) )
       !call PlotElemFunction3D(90, elem, elem%dof, elem%wST( 1, 1:elem%dof, 1) )
       !call PlotElemFunction3D(91, elem, dof, elem%wST_Ritz(1, 1:dof, 1:1) )
       !   call PlotElemFunction3D(ifile2, elem, dof, elem%wST_Ritz( 1, 1:dof, 1) )
       !endif

       !call PlotElemFunction3D(61, elem, elem%dof, elem%zST(1, 1:elem%dof, 1:1) )

       !call PlotElemFunction3D(ifile1, elem, elem%dof, elem%wST(1, 1:elem%dof, 1:1) )
       !call PlotElemFunction3D(ifile2, elem, dof, elem%wST_Ritz( 1, 1:dof, 1) )

       !if(iprint) then
       !write(*,'(a10,2i5, 300es12.4)') 'wST:',elem%i, elem%dof, elem%wST( 1, 1:elem%dof, 1)
       !   write(*,'(a10,2i5, 300es12.4)') 'wSTR:',elem%i, dof, elem%wST_Ritz( 1, 1:dof, 1)
       !   write(*,*),'______'
       !stop
       !endif
       
    enddo ! do i=1, grid%nelem

    !stop "ur39ur39j3oje"

    ! TEST CONDITION NUMBER
    !aver_cond_number = aver_cond_number / grid%nelem
    !print*, "CONDITION NUMBERS OF THE DIAG BLOCKS:"
    !print*, "MIN COND: elem = " , min_cond_number_i , "val = ", min_cond_number
    !print*, "MAX COND: elem = " , max_cond_number_i , "val = ", max_cond_number
    !print*, "AVER COND: val = ", aver_cond_number
    !print*, "*** ***"


    if(num_nonconv > 0 .or. num_nonconv1 > 0) &
       write(*,'(a15, i6,a1,i6, a16,2es12.4)') &
            'Ritz nonconv:', num_nonconv,'(', num_nonconv1, '), max_resid = ', max_resid

    !stop
    
    state%nlSolver%implicitly = loc_implicitly
    ! original setting
    call state%setP_mod( p_mod )
    call state%setQ_mod( q_mod )

    !stop "IterativeRitzReconstr_PrimalDual_big END"

  end subroutine IterativeRitzReconstr_PrimalST_big


  
  !> vertex based continuous reconstruction using subroutines from ho-local.f90
  !> with itype == 3
  !> input elem%wS(1:2*ndim 1:dof)  primal & dual solutions
  !> output primal:  elem%wSTplus(1:ndim, 1:dof_plus, -1)
  !> output dual  :  elem%zSTplus(1:ndim, 1:dof_plus, -1)
  subroutine Vertex_based_PrimalDual(grid)
    class( mesh ), intent(inout) :: grid
    logical, dimension(:), allocatable :: inner ! inner vertex
    integer, dimension(:), allocatable :: N  ! number of elements sharing a vertex
    integer, dimension(:,:,:), allocatable :: supp !list of corresponding elements
    integer :: i, ndimL, itype
    integer :: maxdeg = 30

    itype = 3  ! case for primal dual solution, L2-norm
    !itype = 4  ! case for primal dual solution, H1-semi-norm

    ndimL = 2*ndim

    allocate( inner(1:grid%npoin), N(1:grid%npoin))
    allocate( supp(1:grid%npoin, 1:maxdeg, 1:2) )

    call SeekVertexSupports(grid, maxdeg, inner(1:grid%npoin), N(1:grid%npoin), &
         supp(1:grid%npoin, 1:maxdeg,1:2) )

    ! we go over vertex supports
    do i=1,grid%npoin
       !do i= 21, 21
       !do i= 1, 1
       call HO_LocalVertexProblem(ndimL, 1, i, inner(i), N(i), supp(i, 1:N(i),1:2), itype)
    enddo

    deallocate( inner, N, supp)

  end subroutine Vertex_based_PrimalDual

  !> computes the nonlinear algebraic estimate for DWR method
  !> output: DWR%estimNL = res(u_h)(z_h) = F(u_h) * z_h
  subroutine computeNonlinDWRestimates( DWR, grid)
    type( DWR_t ), intent(inout) :: DWR
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    integer :: i
    real :: loc_eta, alg_estim
    logical :: loc_implicitly, ready

    loc_implicitly = state%nlSolver%implicitly
    state%nlSolver%implicitly = .false.
    ! fill elem%vec(rhs,:)  - residual

    ready = state%state_of_terms%is_res_ready( .false., .false. )

    if (.not. ready) then
       print*, "! computeNonlinDWRestimates is_res_ready: ", ready
       call ComputeST_Terms( .false. )
    endif

    state%nlSolver%implicitly = loc_implicitly

    alg_estim = 0.0
    do i=1, grid%nelem
       elem => grid%elem(i)
       !compute the algebraic error
       loc_eta = DWRElemEstim_alg(elem)
       ! abs -vals?
       alg_estim = alg_estim + loc_eta
       elem%eta(dwrA,1)  = abs( loc_eta )
    enddo

    DWR%estimNL = abs( alg_estim )

    if ( state%nlSolver%non_alg_stop == 'aDWR' ) then
       ! C_Safe should not be here (safety parameter is used directly when computing)
       DWR%aDWR%linTol = DWR%estimNL * DWR%aDWR%C_A
       ! NO !!! TRY:
       !DWR%aDWR%linTol = DWR%aDWR%nlTol * DWR%aDWR%C_A
       !print*, 'DWR%aDWR%linTol updated to ' , DWR%aDWR%linTol
    endif

  end subroutine computeNonlinDWRestimates



  !> compute the LS reconstruction from primal and dual solution
  !> compute the weighted residual estimates:
  !> 1. global estimate ~ J(u) - J(u_h)
  !> 2. local indicators for the mesh refinement
  !> discretization error estimates
  ! STATIONARY PROBLEMS ONLY
  subroutine computeDWRestimates( DWR, grid )
    type( DWR_t ), intent(inout) :: DWR
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    integer :: i, j, kk, k, elemDof, dof, Qnum, nelem, mdof
    integer :: iPlot, ifile, dofP
    character(len=50) :: plot_file
    real :: val, val1

    call state%cpuTime%startEstimTime()

    call DWR%J%computeJu(grid)

    nelem = grid%nelem

    ! DualDWRrezidErrorEstimates is expensive - may be useful to not call it sometimes !
    ! compute the Dual estimator
    ! depending on the dual residual r*(u_h,z_h)( I_h^{p+1 }u_h  - u_h )
    ! multiply b = ( J(phi) - zST^T * C^+ ) )
    ! multiply b * (u_plus - u)
    call DualDWRrezidErrorEstimates( grid, DWR )

    !do i = 1, grid%nelem
    !   elem => grid%elem(i)
    !   ifile = 200 + 10 * state%space%adapt%adapt_level
    !   dofP = size (elem%zSTplus, 2)
    !   call PlotElemFunction3D(ifile+2, elem, dofP, elem%zSTplus( 1, 1:dofP, -1) )
    !end do


    ! TRUE -> use standard RES error indicators for mesh adaptation
    if (DWR%RESindicators) then
       ! rhsST has to be filled from primal problem
       !call ComputeSTDGM_Terms( .true. )
       print*, 'ComputeST_Terms for dWR%RESindicators!, degplus=True, impl = ', state%nlSolver%implicitly

       call ComputeST_Terms( .true. )
       call RezidErrorEstimates( .false. , .true. )

       state%estim( dwrS, 1:ndim ) = state%estim( max_resT_S , 1:ndim )

       ! the total RES estimate
       state%estim( dwr_etaS, 1:ndim ) = state%L_estim(resS)**2 /  state%time%tau(1)

       ! fill the elem%eta(resST, 1) array
       do i = 1, nelem
          ! copy the estimate to eta_S
          ! FR how do we use the POWERS of estim_loc **2 or **1 ???
          grid%elem(i)%eta(dwrS, 1 ) = grid%elem(i)%estim_loc
       enddo

    ! standard way of DWR
    else
       ! primal rezidual DWR estimate
       ! reconstruct or project the solution u ~ deg_plus
       ! computed from the PRIMAL residual
       call PrimalDWRrezidErrorEstimates( grid, DWR )

       val = 0.
       val1 = 0.
       do i = 1, nelem
            elem => grid%elem(i)

            ! FR PROBLEM: here we set eta_aver = |eta| + |eta*|
            ! due to the theory , it may be better to set eta_aver = |eta + eta*|
            elem%eta( dwr_aver, 1) = &
               0.5 * abs( elem%eta(dwr_sign, 1) * elem%eta(dwrS, 1) &
               + elem%eta(dwr_dual_sign, 1) * elem%eta(dwr_dualS, 1) )

            val = val + elem%eta( dwr_aver, 1)
            val1 = val1 +  0.5 * ( elem%eta(dwr_sign,1) * elem%eta(dwrS, 1) &
               + elem%eta(dwr_dual_sign, 1) * elem%eta(dwr_dualS,1) )

            ! global p+1 problem computed only for the dual solution  MI6
            if (state%space%estim_space == 'DWR' .and. DWR%deg_plus) then
               elem%eta( dwr_aver, 1) =  &
                    abs( elem%eta(dwr_sign, 1) * elem%eta(dwrS, 1) )
            endif

            ! absolute value is needed for MarkTopElements,
            elem%estim_loc = abs( elem%eta( DWR%eta_index ,1) )
            write(99,*) elem%xc(:), elem%estim_loc
         enddo
    endif

    state%estim( dwrE ,:) = 0.0
    state%estim( dwr_aver,1) = ( 0.5*( sqrt( state%estim(dwrS,1) ) &
         + sqrt( state%estim(dwr_dualS,1) ) ) )**2.0

    ! global p+1 problem computed only for the dual solution MI6
    if (state%space%estim_space == 'DWR' .and. DWR%deg_plus) then
       state%estim( dwr_aver,1) =  state%estim(dwrS,1)
    endif

    state%estim( dwr_aver_abs,1 ) = ( 0.5*( sqrt( state%estim(dwrS_abs,1) ) &
               + sqrt( state%estim(dwr_dualS_abs,1) ) ) )**2.0

    ! for DWR%ANIindicators = .false. can be Skipped,
    ! it is compute again in  computeDWRAnisotropicEstimates
    if ( .not.  DWR%ANIindicators ) then

       call DWR%J%computeJu( grid )

       state%estim( dwr_Juh, 1 ) =  DWR%J%Ju**2.0 ! it is sqrt in WriteOutputError
       call DWR%J%computeJu_exact( grid )

       ! Ju_exact is not right if the exact solution is unknown
       state%estim( dwrE, 1 ) = abs( DWR%J%Ju - DWR%J%Ju_exact )**2.0 ! its squarerooted in WriteOutputEstims

       ! stopping criterion in aDWR
       DWR%estimDiscr = 0.5*( sqrt( state%estim(dwrS,1) ) + sqrt( state%estim(dwr_dualS,1)))
       !print*, 'estim DISCR: ' , DWR%estimDiscr
       call WriteDWRErrorsScreen( DWR )

       ! write the errors into the file "aDWR_errors" (DWR%aDWR%file_error_name)
       ! this file is used to write tables and graphs of the DWR errors
       if (state%nlSolver%non_alg_stop == 'aDWR') then
          call WriteAdwrErrorsScreen(DWR)
          ! C_L should NOT be in this tolerance
          DWR%aDWR%nlTol = DWR%estimDiscr * DWR%aDWR%C_N ! * DWR%aDWR%C_Safe
       endif

       call DWR%writeDWRerrorsFile(grid%nelem, state%nsize)


       if (state%nlSolver%non_alg_stop == 'aDWR') then
          if (DWR%aDWR%fixedIter) then
             call writeIntoEtaFile( DWR%aDWR%file_etaS, grid, dwrS )
             call writeIntoEtaFile( DWR%aDWR%file_etaD, grid, dwr_dualS )
          endif
       endif

       if (ndim>1) then
         print*, "plot eta and etaD is commented for Ndim>1!"
       else
         iPlot = 93
         plot_file = 'plot_eta.gnu' !'../DWR/plot_eta.gnu'
         ! PLOT estimate
         ! splot 'plot_sol.gnu' with lines notitle
         open( iPlot, file = plot_file, action="write", status="replace" )
         do i = 1, grid%nelem
            elem => grid%elem(i)
            !         print*, 'dwrS eta: ' , elem%eta(dwrS, 1)
            call PlotElemFunction3D( iPlot, elem,  1, elem%eta(dwrS, 1))
         end do
         print*, 'Space error indicator saved to plot_eta.gnu'
         close(iPlot)

         ! PLOT dual estimator
         plot_file = 'plot_dual_eta.gnu' !'../DWR/plot_eta.gnu'
         open( iPlot, file = plot_file, action="write", status="replace" )
         do i = 1, grid%nelem
            elem => grid%elem(i)
            call PlotElemFunction3D( iPlot, elem,  1, elem%eta(dwr_dualS, 1))
         end do
         print*, 'Dual space error indicator saved to plot_dual_eta.gnu'
         close(iPlot)
       end if



    endif  ! if (.not. DWR%ANIindicators )

    if (state%nlSolver%non_alg_stop /= 'aDWR') then
       DWR%estimDiscr = sqrt(state%estim( dwr_aver,1))
       DWR%estimNL = sqrt(state%estim( dwrA, 1))
       DWR%estimLD = sqrt( state%estim( dwr_dualA, 1) )
       !DWR%estimLD = 10.0
       DWR%estimLP = 0.0
       DWR%aDWR%iter_lin_primal = state%linSolver%iter !, sqrt(state%estim( dwrS,1))
       DWR%aDWR%iter = state%nlSolver%iter !, sqrt(state%estim( dwrA, 1))
       DWR%aDWR%iter_lin_dual = DWR%linSolver_iter !, sqrt(state%estim( dwr_aver, 1))

       call DWR%writeNlErrorFile( grid%nelem)
       ! only aDWR
    else
       !OLD: call writeIntoEtaFile( DWR%aDWR%file_eta_name, grid, dwrS )
    end if


    call state%cpuTime%addEstimTime()

  end subroutine computeDWRestimates


  !> compute the primal DWR error estimates for the projected solution used for hp adaptation
  subroutine PrimalDWRrezidErrorEstimates_hp( grid, DWR )
    type( DWR_t ), intent(inout) :: DWR
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    integer :: i, j, kk, k, elemDof, dof, Qnum, nelem, mdof
    integer :: iPlot, ifile, dofP, dofM, Tdof, dofA
    character(len=50) :: plot_file
    real :: val, val1
    real, dimension(:), allocatable  :: Au
    real, dimension(:,:), allocatable  :: A, A1, u
    real, allocatable, dimension(:,:) :: weightFun

    ! element projections
    do i=1, grid%nelem
       elem => grid%elem(i)

       dof = elem%dof
       Tdof = elem%Tdof
       dofM = DOFtriang( elem%deg -1)
       dofP = DOFtriang( elem%deg ) !!+1)

       if(dofP /= size(elem%wST, 2) ) then
          print*,' dimension trouble: ', elem%deg,elem%dof, dofP, size(elem%wST, 2)
          stop "37d3hj3"
       endif

       ! the weights - dual
       dofA = elem%dof_plus

       allocate( weightFun(1:ndim, 1:dofA) )
       weightFun(1:ndim,1:dofA) = elem%zST_Ritz(1:ndim, 1:dofA,1)
       weightFun(1:ndim,1:dofM) = 0.0

       ! the primal residuals

       ! A projection - simplified for scalar problem
       allocate( u(1:dofP, 1:2), Au(1:dofP), A(1:dofP, 1:dofP), A1(1:dofP, 1:dofP) )
       do k=1, ndim
          A(1:dofP, 1:dofP) = elem%bigBlock(0)%Mb( 1:dofP, 1:dofP, k, k, 1, 1 )

          u(1:dofP,1) = elem%wST( k, 1 :  dofP, 1)
          Au(1:dofP) = matmul(A(1:dofP, 1:dofP), u (1:dofP,1) )

          !write(*,'(a12, i5, 200es12.4)')  " u ", i, u(1:dofP,1) !elem%wST( :, :, : )
          !write(*,'(a12, i5, 200es12.4)')  "Au ", i, Au(1:dofP) !elem%wST( :, :, : )

          A1(1:dofM, 1:dofM) = A(1:dofM, 1:dofM)
          !call MblockInverse(dofM, A1(1:dofM, 1:dofM) )

          u = 0.
          u(1:dofM,1) = Au(1:dofM)
          call SolveLocalMatrixProblem(dofM, A1(1:dofM, 1:dofM), 1, u(1:dofM,1) )

          !print*
          !write(*,'(a12, i5, 200es12.4)')  " uL ", i, u(1:dofP,1)
          !write(*,'(a12, i5, 200es12.4)') " AL uL ", i, matmul(A(1:dofM, 1:dofM), u(1:dofM,1))
          !write(*,'(a12, i5, 200es12.4)') " Au =BL ", i, Au(1:dofM)
          !print*


          u(1:dofP, 2) = matmul(A(1:dofP, 1:dofP), u(1:dofP, 1) )

          u(1:dofP,2) = u(1:dofP,2) - Au(1:dofP)
          !elem%wST( k, 1 : dofP, 1) = u(1:dofP)


          ! error estimate for the projection
          !do k=1, ndim
          elem%eta(dwrEtaI_PM, k) = abs( dot_product( weightFun(k,1:dofP), u(1:dofP, 2)  ))

       enddo  ! k=1,ndim

       deallocate(u, A, Au, A1)

       deallocate(  weightFun )

       !print*, 'DED#???',elem%i,elem%eta(dwrEtaI_PM, 1)

    enddo  ! do i=1, grid%nelem


  end subroutine PrimalDWRrezidErrorEstimates_hp


  !> DWR error computation similar to rezid error estimates
  !> the maximum is substituted by the dual solution and reconstruction
  !> perform the error estimates using the dual norm
  !> including (non-)linear algebraic error
  !> ONLY STATIONARY CASE - q=0 in STDG
  subroutine PrimalDWRrezidErrorEstimates(grid, DWR )
    class( mesh ), intent(inout) :: grid
    type( DWR_t ), intent(inout) :: DWR
    class(element), pointer :: elem, elem1
    real :: alg_estim, space_estim, space_estimABS, loc_eta
    integer :: i, iPlot
    character(len=50) :: plot_file
    logical :: loc_implicitly
    real :: t1, t2, ttime
    integer :: kk = 0, k
    logical :: deg_plus

    !write(*,'(a12, 200es12.4)')  "%zsTPlus C1", grid%elem(1)%zSTplus( 1, :, -1 )
    !write(*,'(a12, 200es12.4)')  "%zsTPlus C1", grid%elem(1)%zSTplus( 1, :, 1 )

    deg_plus = .true.

    state%space%adapt%stop_adaptation=0

    ttime = state%time%ttime

    loc_implicitly = state%nlSolver%implicitly
    state%nlSolver%implicitly = .false.
    grid%elem(:)%deg_plus = .true.

    ! setting of fields elem%vec(rhs,*), elem%vec(rhsT,*) for error estimate
    ! deg_plus = .true.
    !call ComputeSTDGM_Terms( .true. )
!    print*, 'ComputeST_Terms in PrimalDWRrezidErrorEstimates degplus= true, impl =', state%nlSolver%implicitly
!    call ComputeST_Terms( .true. )

    call cpu_time(t1)

    if( state%time%disc_time /= 'STDG') &
          stop 'DWRrezidErrorEstimates only for STDGM'

    if ( state%time_dependent ) then
      stop 'DWR method is not implemented for time-dependent problems YET!'

    !STATIONARY
    else
      alg_estim= 0.0
      space_estim = 0.0 ! sum
      space_estimABS = 0.0 ! sqrt of **2.

      if (state%time%deg > 0 ) &
         stop 'DWRElemEstim are implemented only for q == 0!'

      if (DWR%PU)  stop 'DWR%PU not tested for new Adgfem'

      if (DWR%PU) then
         !print*, 'DWR%PU we need p+2 long residual, dont forget to put it back'
         print*, "UpdateProblemPlus is no longer used! COMMENTED"
         stop 'PU method should be updated with the new bigBlock structure!'

!         call updateProblemPlus( grid, .true. )
!         ! we need the larger matrix
!         state%nlSolver%implicitly = .false.
!         !grid%elem(:)%deg_plus = .true.
!         call ComputeSTDGM_Terms(deg_plus )
!         call updateProblemPlus( grid, .false. )
!         !grid%elem(:)%deg_plus = .true.
!         call ComputeSTDGM_Terms(deg_plus )
      endif

      ! compute the vertex oriented estimates using PU
      if (DWR%PU) then

         allocate( DWR%pu_estim(1: grid%nelem) )

         do i = 1,grid%nelem
            call DWR%pu_estim(i)%init(3)
            elem => grid%elem(i)
            DWR%pu_estim(i)%x(1:3) = DWRElemEstim_PU(elem)
         enddo
         call DWR%distributePuEstimToVertices( grid )

         iPlot = 56
         plot_file = "vertex_estims.gnu"
         open( iPlot, file = plot_file, action="write", status="replace" )
         call grid%plotVertexFunction3D( iPlot, DWR%vertex_estim(1:grid%npoin) )
         print*, 'Vertex estims saved to vertex_estims.gnu'
         close(iPlot)
      endif

      !write(*,'(a12, 200es12.4)')  "%zsTPlus C2", grid%elem(1)%zSTplus( 1, :, -1 )
      !write(*,'(a12, 200es12.4)')  "%zsTPlus C2", grid%elem(1)%zSTplus( 1, :, 1 )

      !write(*,'(a12, 200es12.4)')  "%elem%vec ", grid%elem(1)%vec( rhs,  : )
      !write(*,'(a12, 200es12.4)')  "%elem%vec ", grid%elem(1)%w( 0,  : )
      !write(*,'(a12, 200es12.4)')  "%elem%wST ", grid%elem(1)%wST( :,:,  : )

      ! auxiliarly error estimate for the HGhp variant
      if( state%space%adapt%adapt_type == 'HG' ) &
           call PrimalDWRrezidErrorEstimates_hp(grid, DWR )

      do i=1, grid%nelem
         elem => grid%elem(i)

         ! we use elem%vec( rhs, :) where the vector a(u,\phi_i) from 1 to ndof_plus is saved

         ! compute the space error
         if (DWR%PU) then
            loc_eta = DWR%EtaFromVertices( elem ) ! distribute estims from vertices to elements
         else
            loc_eta = DWRElemEstim_space(elem)
         endif

         space_estim = space_estim + loc_eta
         space_estimABS = space_estimABS + abs( loc_eta )

         elem%eta(dwrS,1) = abs( loc_eta )
         ! signum of eta( dwrS ), can be used when we try to improve the localization
         elem%eta(dwr_sign,1) = sign( 1.0, loc_eta )

         !compute the algebraic error
         loc_eta = DWRElemEstim_alg(elem)
         !          alg_estim(1:ndim) = alg_estim(1:ndim) + loc_eta(1:ndim)

         ! algebraic DWR error, absolute value of NOT, dwrA
         !alg_estim = alg_estim + abs( loc_eta )
         alg_estim = alg_estim + ( loc_eta )

         elem%eta(dwrA,1)  = abs( loc_eta )
         !          if ( minval(loc_eta(1:ndim)) < 0.0 ) then
         !            print*, '(PROBLEM?) The local algebraic error indicator is negative on element(', &
         !               elem%i , ') and equals:', loc_eta(1:ndim)
         !          endif

         !print*,'edes3s22',i,  elem%eta(dwrS,1:ndim), elem%eta(dwrEtaI_PM,1:ndim)

      enddo

       state%estim( dwrS, 1 ) = space_estim**2.0 ! has to be sqrt in future - its done in WriteOutputError
       state%estim( dwrS_abs, 1) = space_estimABS**2.0 ! has to be sqrt in future - its done in WriteOutputError
       state%estim( dwrA ,1 ) = alg_estim**2.0 ! has to be sqrt in future - its done in WriteOutputError

    endif !stationary

    state%time%ttime = ttime
    state%nlSolver%implicitly  = loc_implicitly
    grid%elem(:)%deg_plus = .false.

    call cpu_time(t2)
    state%CPU_constaint = state%CPU_constaint + t2 - t1

  end subroutine PrimalDWRrezidErrorEstimates


  !> including (non-)linear algebraic error
  !> ONLY STATIONARY CASE - q=0 in STDG
  !> compute the DUAL estimator depending on the dual residual r*(u_h,z_h)( I_h^{p+1 }u_h  - u_h )
  subroutine DualDWRrezidErrorEstimates( grid, DWR )
    class( mesh ), intent(inout) :: grid
    type( DWR_t ), intent(inout) :: DWR
    class(element), pointer :: elem, elem1
    integer :: i, iPlot
    character(len=50) :: plot_file
    logical :: loc_implicitly
    real :: t1, t2, ttime
    real, allocatable, dimension(:) :: x,b, xx ,bb
    integer :: ivec, kvec, ndof, dof, dof_plus, mdof, plusDeg, Tdof, k, l, nsize, dd
    real :: loc_eta, loc_etaA
    real :: space_estim, space_estimABS, alg_estim
    integer :: j, dofA, bigNsize, smallNsize
    !real, allocatable, dimension(:) :: wi
    real, allocatable, dimension(:,:) :: weightFun
    real, pointer, dimension(:,:,:) :: temp
    logical :: impl
    integer :: kk = 0

    !write(*,'(a12, 200es12.4)')  "%wsTPlus A1", grid%elem(1)%wSTplus( 1, :, -1 )
    !write(*,'(a12, 200es12.4)')  "%wsTPlus A1", grid%elem(1)%wSTplus( 1, :, 1 )

    if (state%time%deg > 0 ) &
      stop 'DualDWRElemEstim are implemented only for q == 0!'

!    state%space%adapt%stop_adaptation = 0
    ! for ComputeTerms
    impl = state%nlSolver%implicitly
    state%nlSolver%implicitly = .true.

!    call cpu_time(t1)

    if( state%time%disc_time /= 'STDG') &
          stop 'DWRrezidErrorEstimates only for STDGM'

    if ( state%time_dependent ) then
      stop 'DWR method is not implemented for time-dependent problems YET!'

    !STATIONARY
    else
      plusDeg = state%p_mod_max

      if (state%nlSolver%non_alg_stop == 'aDWR' .and. DWR%PU) &
            stop 'aDWR is not implemented with PU!'

!
      grid%elem(:)%deg_plus = .true.

      alg_estim = 0.0
      space_estim = 0.0 ! sum
      space_estimABS = 0.0 ! sqrt of **2.

!      print*, 'FRcontrol - wi(dof+1:dofA) line 2167 VD ETA^I'

      ! FR_aDWR control residuum
!      do i = 1, grid%nelem
!         elem => grid%elem(i)
!
!         dof = elem%dof
!         dofA = elem%dof_plus
!         Tdof = elem%Tdof
!         if (i<10) then
!           print*, "norm of the res:" , norm2(DWR%dualRes(i)%x(1,1:dof,1)), norm2(DWR%dualRes(i)%x(1,dof+1:dofA,1))
!         end if
!
!      end do

      do i = 1, grid%nelem
         elem => grid%elem(i)

         dof = elem%dof
         dofA = elem%dof_plus
         Tdof = elem%Tdof
         ! put to new subroutine on every elem
         !allocate( wi(1:dofA) )
         ! FR_DEGPLUS -call setWeightFun_primalEst( elem, DWR%wType, weightFun(1:ndim,1:dofP))
         allocate( weightFun(1:ndim,1:dofA))
         call setWeightFun_dualEst( elem, DWR%wType, weightFun(1:ndim,1:dofA))

         loc_eta = 0.0
         loc_etaA = 0.0

         do j = 1,ndim
            ! CONTROL whether (wSTplus - wST) or wSTplus is used, differs by the alg error
            loc_eta = loc_eta + &
              dot_product( weightFun(j, 1:dofA), DWR%dualRes(i)%x(j,1:dofA,1)  )
            loc_etaA = loc_etaA + &
              dot_product( elem%wST(j,1:dof,1), DWR%dualRes(i)%x(j,1:dof,1) )

         enddo ! j

         deallocate( weightFun )

         space_estim = space_estim+ loc_eta
         space_estimABS = space_estimABS + abs( loc_eta )
         elem%eta( dwr_dualS, 1 ) = abs( loc_eta )
         ! the signum of eta_dualS - may be used when we try to improve the localization
         elem%eta(dwr_dual_sign,1) =  sign( 1.0 , loc_eta )

         ! algebraic DWR error, absolute value of NOT, dwrA
         !alg_estim = alg_estim + abs( loc_etaA )
         alg_estim = alg_estim +  ( loc_etaA )

         elem%eta(dwr_dualA, 1)  = abs( loc_etaA )

      end do !i

      state%estim( dwr_dualS, 1 ) = space_estim**2.0 ! has to be sqrt in future - its done in WriteOutputError
      state%estim( dwr_dualS_abs, 1) = space_estimABS**2.0 ! has to be sqrt in future - its done in WriteOutputError
      state%estim( dwr_dualA ,1 ) = alg_estim**2.0 ! has to be sqrt in future - its done in WriteOutputError

      grid%elem(:)%deg_plus = .false.

    endif !stationary

    !put back
    state%nlSolver%implicitly = impl

!    call cpu_time(t2)
!    state%CPU_constaint = state%CPU_constaint + t2 - t1

  end subroutine DualDWRrezidErrorEstimates


  subroutine Set_Elem_Regul_Estim_Decay(elem)
   class(element), target, intent(inout):: elem ! elem = element
    real, dimension(:), pointer :: weights
    real, dimension(:,:), pointer :: phi
    real, dimension(:,:,:), pointer :: Dphi
    real, dimension(:,:), allocatable :: wi, wii
    real, dimension(:,:,:), allocatable :: wwP   ! projections
    integer :: i, Qnum, Qdof, dof, dofL, deg, k, kst, l, ifile, ifile1, ifile2
    integer :: min_deg, ni
    real, dimension (:, :), allocatable :: ss
    real, dimension (:, :), allocatable :: eta_loc
    real :: val, val1, order, sumy, sumyi, sumx, sumx2, sumy2, sumxy, eta_resS
    logical :: iprint, singularity, loc_implicitly

    iprint = .false.
    !iprint = .true.

    call Detect_apriori_known_singularity(elem, singularity)
    if(singularity) iprint = .true.

    if( mod(elem%i, 1) == 0) iprint = .true.

    !if(elem%i == 1 .or. elem%i == 101 .or. elem%i == 111) iprint = .true.   ! mesh LL-shape.uns.365.grid
    !if(elem%i == 2 .or. elem%i == 38 ) iprint = .true.  ! mesh LL-shape.uns.100.grid


    ! storing of array eta
    allocate(eta_loc(1:max_eta, 1:ndim) )
    eta_loc(1:max_eta, 1:ndim) = elem%eta(1:max_eta, 1:ndim)

    dof = elem%dof


    ! projection of the solution of the element
    allocate(wwP(0:elem%deg, 1:dof, 1:ndim) )
    !if(iprint) &
    !   write(*,'(a6,3i5, 50es12.4)') 'proj W:',elem%i, elem%deg, deg, elem%w(0, 1:dof)
    do k=1, ndim
       wwP(elem%deg, 1:dof, k) = elem%w(0, (k-1)*dof + 1 : k*dof)
    enddo


    !write(*,'(a6,3i5, 50es12.4)') 'proj W:',elem%i, elem%deg, deg, wwP(elem%deg, 1:dof, 1)
    do deg = elem%deg, 0, - 1
       call Energy_Elem_deg_projection(elem, deg, wwP(deg, 1:dof, 1:ndim) )
    !!   if(iprint) then
    !!   write(*,'(a6,3i5, 50es12.4)') 'proj W:',elem%i, elem%deg, deg, wwP(deg, 1:dof, 1)
    !!   endif
    enddo

    !write(*,'(a12, 3i5, l3,40es12.4)') 'rhs:',elem%i, elem%deg, &
    !     elem%dof, state%nlSolver%implicitly, elem%vec(rhs,:)

    loc_implicitly = state%nlSolver%implicitly
    state%nlSolver%implicitly = .false.

    elem%deg_plus = .true.


    ! files outputs
    if(singularity) then
       ifile = state%space%adapt%adapt_level + 550
    else
       ifile = state%space%adapt%adapt_level + 500
    endif
    ifile1 = ifile + 100
    ifile2 = ifile + 200

    ! least squares
    sumx = 0.; sumy = 0.; sumx2 = 0.; sumy2 = 0.; sumxy = 0.; ni = 0

    min_deg = max(0, elem%deg -2)
    !min_deg = 0.
    do deg = elem%deg, min_deg, - 1
       do k=1, ndim
          elem%w(0, (k-1)*dof + 1 : k*dof)  = wwP(deg, 1:dof, k)
       enddo
       call Compute_ONLY_ONE_ELEMENT_Terms(elem )

       eta_resS = elem%eta(resS, 1)

       if( state%time%disc_time /= 'STDG') then
          call DualElemEstimate(elem, 3, .true.)  ! 3 => element residuum in X- norm
       else
          !call ST_DualElemEstimate_Var2(elem,3)
          !call ST_DualElemEstimate(elem, 2 )  ! 2 => element residuum in the H^1-norm (for inviscid??)
          call ST_DualElemEstimate(elem, 3 )  ! 3 => element residuum in X- norm
       endif

       !if(elem%deg == 2) &
            write(*,'(a12, 3i5, l3,2es12.4,a2, 40es12.4)') 'rhs:',elem%i, deg, (deg+1)*(deg+2)/2, &
            state%nlSolver%implicitly, elem%eta(resA:resS, 1), '|',elem%vec(rhs,:)

       ! least squares:
       if(deg <= elem%deg .and. deg >= 2 ) then
          ni = ni + 1
          sumx = sumx + log(1. * deg)
          sumx2 = sumx2 + log(1. * deg)**2
          sumy = sumy + log(elem%eta(resS, 1))
          sumy2 = sumy2 + log(elem%eta(resS, 1))**2
          sumxy = sumxy + log(1. * deg) * log(elem%eta(resS, 1))
         ! write(*,'(a8, i5, 30es12.4)') 'L R 23:',ni, 1.*deg, elem%eta(resS, 1), sumx, sumx2, sumy, sumy2, sumxy
       endif

       ! direct evaluation
       val = 1.
       val1 = 1.
       if(deg < elem%deg .and. deg >= 2 ) then
          val = 1.5 + log(1./sqrt(elem%area) *  eta_resS / elem%eta(resS, 1)) / log(1.*(deg -1 ) / deg)
          val1 = 1.5 + log( eta_resS / elem%eta(resS, 1) ) / log(1.*(deg -1 ) / deg)
          !print*,'#E@S@', eta_resS / elem%eta(resS, 1) , 1.*(deg -1 ) / deg,  &
          !     log( eta_resS / elem%eta(resS, 1)), log(1.*(deg -1 ) / deg)


          write(ifile1, *) elem%i, elem%deg, elem%deg - 0.1 * (elem%deg - deg), &
               elem%eta(resS, 1),val, val1

          !print*,'vals:', val, val1

       endif

       write(ifile, *) elem%i, elem%deg, elem%deg - 0.1 * (elem%deg - deg), &
               elem%eta(resS, 1),val, val1
    enddo

    ! least squares
    if( ni * sumx2-sumx*sumx /= 0.) then
       order =  - (ni*sumxy -sumx*sumy)/(ni*sumx2-sumx*sumx) + 1.5
    else
       order = 1.
    endif

    write(ifile2, *) elem%i, elem%deg, elem%deg, order, elem%xc

    !if(elem%deg == 2) print*,'---', order
    !if(ni == 3) stop "93u30ojd3ow,xwsaxzwsq]'xs21"

    write(ifile, '(x)')
    write(ifile1, '(x)')

    ! RESTORING varaibles and array back
    state%nlSolver%implicitly = loc_implicitly
    do k=1, ndim
       elem%w(0, (k-1)*dof + 1 : k*dof)  = wwP(elem%deg, 1:dof, k)
    enddo

    elem%eta(1:max_eta, 1:ndim) = eta_loc(1:max_eta, 1:ndim)
    deallocate(eta_loc)

  end subroutine Set_Elem_Regul_Estim_Decay


!  !> compute the Ritz reconstruction based on the local problem
!  !> \f$ a(uPlus, \varphi) = res(\varphi) \forall \varphi \in S_{hp}\f$
!  !> primal / dual solution is used
!  !> if return flag == .true. then the reconstruction is zero on whole domain
!  !> it needs to the matrix elem%blockPlus,and RHS saved in elem%rhsST for primal and DWR%rhs for dual
!  subroutine RitzReconstruction( grid , primal, flag, DWR)
!    class( mesh ), intent(inout) :: grid
!    logical, intent(in) :: primal
!    logical, intent(out) :: flag
!    type( DWR_t ), intent(in), optional :: DWR
!    class(element), pointer :: elem
!    integer :: nelem, i, j , k, nsize, dof, Tdof, ifile
!    real, dimension(:), allocatable :: b ! rhs
!    logical :: loc_implicitly
!    logical :: deg_plus
!
!    deg_plus = .true.
!    ! we suppose that DWR%deg_plus = false ! problem for DWR_P, where it is TRUE
!
!    nelem = grid%nelem
!
!    flag = .true.
!
!    ifile = 200 + 10 * state%space%adapt%adapt_level
!    print*, 'ifile === ', ifile
!
!    print*, 'Ritz reconstruction primal:', primal
!    print*
!
!    if (.not. allocated( grid%elem(1)%blockPlus ) ) &
!      stop 'blockPlus is not allocated in RitzReconstruction'
!
!    !nsize = size( grid%elem(1)%blockPlus%Mb(:,1) )
!    !do i=1,nsize
!    !   write(*,'(a8,i5, 300es12.4)') 'MB:;', i, grid%elem(1)%blockPlus%Mb(i,1:nsize)
!    !enddo
!
!
!    if (primal) then
!
!      ! fill the RHS
!      loc_implicitly = state%nlSolver%implicitly
!      state%nlSolver%implicitly = .false.
!      !grid%elem(:)%deg_plus = .true.
!      call ComputeSTDGM_Terms( deg_plus )
!      !grid%elem(:)%deg_plus = .false.
!      state%nlSolver%implicitly = loc_implicitly
!
!      ! not done yet
!      do i = 1, nelem
!         elem => grid%elem(i)
!         dof = elem%dof_plus
!         Tdof = elem%Tdof
!         nsize = size( grid%elem(i)%blockPlus%Mb(:,1) )
!         allocate( b(1:nsize), source = 0.0 )
!
!         !write(*,'(a10, 8i5)') 'SIZES:', i, elem%deg , elem%dof, elem%dof_plus, dof, Tdof, ndim, nsize
!         b(1:nsize) = copy3Darrayto1Darray( elem%rhsST( 1:ndim,1:dof,1:Tdof), nsize )
!!         if (i==1) then
!!!!            print*, 'MATRIX::' , elem%blockPlus%Mb(:,:)
!!!         if ( abs( sum(elem%rhsST( 1:ndim,1:dof,1:Tdof) - DWR%dualRes(i)%x(:,:,:))) > 0.00001 ) &
!!            print*, 'difference in primal and dual RHS::', elem%rhsST( 1:ndim,4:dof,1:Tdof) - DWR%dualRes(i)%x(:,4:dof,:)
!!!            print*, 'elem vec:' , rhs, elem%vec(rhs,:)
!!!            stop
!!         endif
!!
!         call SolveLocalMatrixProblem(nsize, elem%blockPlus%Mb(1:nsize,1:nsize), 1, b(1:nsize) )
!
!         !rhs -> wSTplus
!         if ( associated( elem%wSTplus ) ) &
!               deallocate( elem%wSTplus )
!         allocate( elem%wSTplus( 1:ndim, 1:dof, 1:Tdof ) )
!         elem%wSTplus(1:ndim, 1:dof, 1:Tdof) = copy1DarrayTo3Darray( &
!                                       b(1:nsize), ndim, dof, Tdof)
!         !CONTROL - the reconstruction is not zero somewhere
!         if ( norm2(b(:)) > 1E-9 ) then
!               flag = .false.
!         endif
!         deallocate( b )
!
!         ! FR for testing purposes only
!         call PlotElemFunction3D(ifile+1, elem, dof, elem%wSTplus( 1:ndim, 1:dof, 1))
!
!      enddo
!
!    else ! dual !will be never called for DWR_P
!      if ( .not. present(DWR) ) &
!         stop 'RitzReconstruction with primal=.false. cannot be called without DWR, needed for the rhs!'
!
!      do i = 1, nelem
!         elem => grid%elem(i)
!         dof = elem%dof_plus
!         Tdof = elem%Tdof
!         nsize = size( grid%elem(i)%blockPlus%Mb(:,1) )
!         allocate( b(1:nsize), source = 0.0 )
!
!         write(*,'(a10, 8i5)') 'sizes:', i, elem%deg , dof, Tdof, ndim, nsize!, &
!          write(*,*)     size(DWR%dualRes(i)%x, 1), &
!              size(DWR%dualRes(i)%x, 2), &
!              size(DWR%dualRes(i)%x, 3)
!
!         b(1:nsize) = DWR%dualRes(i)%copyTo1Darray3_1_2( nsize )
!         call SolveLocalTrasposedMatrixProblem(nsize, elem%blockPlus%Mb(1:nsize,1:nsize), 1, b(1:nsize) )
!
!         !rhs -> zSTplus
!         if ( associated( elem%zSTplus ) ) &
!               deallocate( elem%zSTplus )
!         allocate( elem%zSTplus( 1:ndim, 1:dof, 1:Tdof ) )
!         elem%zSTplus(1:ndim, 1:dof, 1:Tdof) = copy1DarrayTo3Darray( &
!                                       b(1:nsize), ndim, dof, Tdof)
!         !CONTROL - the reconstruction is not zero somewhere
!         if ( norm2(b(:)) > 1E-9 ) then
!               flag = .false.
!         endif
!         deallocate( b )
!
!         ! FR for testing purposes only
!         call PlotElemFunction3D(ifile+2, elem, dof, elem%zSTplus( 1:ndim, 1:dof, 1))
!      enddo
!    endif
!
!  end subroutine RitzReconstruction


  ! write one line into the file for errors from elem%eta( dwrS )
   ! now it is cleaned after every adaptation
   ! i_eta - ith index of eta
   subroutine writeIntoEtaFile( fileName, grid, i_eta )
      character(len=20), intent(inout) :: fileName
      class( mesh ), intent(in) :: grid
      integer, intent(in) :: i_eta
      integer :: i
      integer :: iFile = 42
      real, dimension(:), allocatable :: estim

      allocate( estim(1:grid%nelem) , source = 0.0 )

      do i = 1, grid%nelem
         estim(i) = grid%elem(i)%eta( i_eta , 1 )
      end do

      print*, 'fileName', fileName

      open( iFile, file = fileName , action="write", position="append" )
        write(iFile,*) &
          estim
      close(iFile)

      deallocate( estim )

  end subroutine writeIntoEtaFile


  !> main subroutine for the DWR_ANI errors estimates based on the article Dolejsi 2017
  !> computes etaII
  subroutine computeDWRAnisotropicEstimates( DWR, grid)
      type( DWR_t ), intent(inout) :: DWR
      class( mesh ), intent(inout) :: grid
      class( element ), pointer :: elem
      integer :: i, j, k, ifile
      integer :: nelem
      real :: wDiff, wPlusDiff, wDiff_loc, wPlusDiff_loc
!      real, dimension(1:ndim) :: globalEstim, primalEstimFull, dualEstimFull
      real :: globalEstim, primalEstimFull, dualEstimFull
      ! is the dual solution zST already computed
      if (.not. DWR%dualProblem_computed) &
        stop 'Dual solution zST needs to be already computed in computeDWRAnisotropicEstimates!'

      call state%cpuTime%startEstimTime()

      globalEstim = 0.0

      ! type of the higher-order reconstruction for primal+dual solutions:  ! VD ETA^II
      !itype = 1 ! reconstruction from vertex based
      ! other types need to be copied from zST_LS to zSTplus

      !itype =  1 ! reconstruction from WENO_LS with p+1
      !itype =  2 ! reconstruction from WENO_LS with p+2  DO NOT USE for hp !!!

      if (ndim > 1) then
        !print*, "computeDWRAnisotropicEstimates for NDIM > 1"
        if (state%model%Re > 0) then
          print*, "computeDWRAnisotropicEstimates is done only for Euler eq!", ndim,state%model%Re
          stop
        end if
      end if

      nelem = grid%nelem
      ! fill elem%w(0,:) array  - Necessary for STDG computation
      do i = 1, nelem
         elem => grid%elem(i)
         call Transfer_wST_to_w_Elem(elem , 0, elem%TQnum)
         !TODO : wActual - we should use it in future instead of elem%w
      enddo

      state%estim( dwrS,      : ) = 0.
      state%estim( dwr_dualS, : ) = 0.

      state%estim(dwr_LS_EtaII,1:ndim) = 0.
      state%estim(dwr_loc_EtaII,1:ndim) = 0.


      state%estim(6:9,1:ndim) = 0.
      state%estim(11:15,1:ndim) = 0.
      state%estim(18:20,1:ndim) = 0.



      do i=1,nelem
        elem => grid%elem(i)

        ! compute the primal estimates based on
        ! primal residual weighted by the norms of dual sol zST
        ! 1 - \eta_{V,K}, 2 - \eta_{K,B}, 3 - \eta_{K,D}
        ! psi = zSTplus - P_h(zSTplus)
        ! 4 - ||psi||_{K}^2 , 5 - ||psi||_{\partial K}^2, 6 - ||A grad(psi)||_{\partial K}^2
        !print*,'DWRAniEstimElemPrimal',elem%i
        call  DWRAniEstimElemPrimal(elem, primalEstimFull, DWR%wType )
        ! compute the DUAL estimates based on
        ! dual residual weighted by the norms of dual sol wST
        !  \eta^*_{V,K},  \eta^*_{K,B},  \eta^*_{K,D}
        ! psi = wSTplus - P_h(wSTplus)
        !  ||psi||_{K}^2 ,  ||psi||_{\partial K}^2,  ||A grad(psi)||_{\partial K}^2
        !print*,'DWRAniEstimElemDual',elem%i
        call  DWRAniEstimElemDual(elem, DWR%J, dualEstimFull, DWR%wType )


        ! !!! CORRECTION OF BUGS IN SQUARES IN REZIDUA AND WEIGHTS, VD 2018/01/10

        ! put to elem%eta for classical HG- adaptation ???
        elem%eta( dwr_aver ,1:ndim) = 0.0
        elem%eta( dwr_aver ,1 ) = (primalEstimFull + dualEstimFull)/2.

        ! global p+1 problem computed only for the dual solution  MI6
!        if (state%space%estim_space == 'DWR' .and. DWR%deg_plus) then
!          if (i==1) &
!               print*, "for DEG_PLUS we use only primal estim in DWR_ANI!", primalEstimFull
!          elem%eta( dwr_aver ,1) = primalEstimFull
!        endif


        ! array for marking element when HG adaptation is used
        !elem%estim_loc = elem%eta(dwr_aver, 1)       ! \eta^II
        !elem%estim_loc = elem%eta(dwrEtaI_aver, 1)   ! \eta^I
        !elem%estim_loc = elem%eta(dwr_LS_EtaII, 1)
        !elem%estim_loc = elem%eta(dwrEtaI_aver, 1)

        elem%estim_loc = elem%eta(DWR%eta_index, 1)  ! used for hp-variant
        !print*,'DWR%eta_index = ', DWR%eta_index


        !!elem%estim_loc = elem%eta(dwrWeightKV, 1)
        ! PRIMAL only - probably not used ???
        elem%eta( dwrS ,1) =  primalEstimFull  ! VD

        ! dual only
        elem%eta( dwr_dualS ,1) = dualEstimFull

        globalEstim = globalEstim + elem%eta( dwr_aver ,1)

        state%estim( dwrS, 1:ndim ) = state%estim( dwrS, 1:ndim ) &
             + elem%eta( dwrS,1:ndim )

        state%estim( dwr_dualS, 1:ndim ) = state%estim( dwr_dualS, 1:ndim ) &
             + elem%eta( dwr_dualS, 1:ndim)

        !if(i == 10 .or. i== 236) then !print*,'!> BEGIN  test for the suitability of estimates'
        !   write(*,'(a8,i5, 30es12.4)') 'etasD:',i, elem%estim_loc, elem%eta( dwr_aver , 1), &
        !        primalEstimFull(1), dualEstimFull(1), &
        !        elem%eta(dwrEtaKV, 1), elem%eta(dwrEtaKV_dual, 1), &
        !        elem%eta(dwrEtaKB, 1), elem%eta(dwrEtaKB_dual, 1)
        !endif

        !itype2 = -1
        !call  DWRAniEstimElemPrimal(elem, primalEstimFull, itype2 )
        !call  DWRAniEstimElemDual( elem, DWR%J, dualEstimFull, itype2 )

        !elem%eta(dwr_loc_EtaII,1:ndim) = (primalEstimFull + dualEstimFull)/2
        !state%estim(dwr_loc_EtaII,1:ndim) = state%estim(dwr_loc_EtaII,1:ndim) &
        !     + elem%eta(dwr_loc_EtaII,1:ndim)
        ! !> END  test for the suitability of estimates


        state%estim(6:9,1:ndim)   = state%estim(6:9,1:ndim) +   elem%eta( 6: 9, 1:ndim)
        state%estim(11:12,1:ndim) = state%estim(11:12,1:ndim) + elem%eta(11:12, 1:ndim)

        state%estim(13,1:ndim) = state%estim(13,1:ndim) + elem%eta(13, 1:ndim)*elem%eta(9,1:ndim)
        state%estim(14,1:ndim) = state%estim(14,1:ndim) + elem%eta(14, 1:ndim)*elem%eta(11,1:ndim)
        state%estim(15,1:ndim) = state%estim(15,1:ndim) + elem%eta(15, 1:ndim)*elem%eta(12,1:ndim)

        state%estim(18:20,1:ndim) = state%estim(18:20,1:ndim) &
             + elem%eta(18:20, 1:ndim)*elem%eta(6:8,1:ndim)


      end do  ! do i=1, nelem

      !do i=1,20,5
      !   write( *,'(a8, i5, 40es12.4)') 'ESTIM_LOC', i,grid%elem((i-1)*5+1: i*5)%estim_loc
      !end do

      ! has to be sqrt in future - its done in WriteOutputError
      state%estim( dwr_aver, 1) = globalEstim**2.0
      state%estim( dwrS,     1:ndim ) = state%estim( dwrS,1:ndim)**2.0
      state%estim( dwr_dualS,1:ndim ) = state%estim( dwr_dualS,1:ndim)**2.0

      state%estim(6:9,1:ndim)   = state%estim(6:9,1:ndim)**2
      state%estim(11:15,1:ndim) = state%estim(11:15,1:ndim)**2
      state%estim(18:20,1:ndim) = state%estim(18:20,1:ndim)**2

      state%estim(dwr_LS_EtaII,1:ndim)  = state%estim(dwr_LS_EtaII,1:ndim)**2.0
      state%estim(dwr_loc_EtaII,1:ndim) = state%estim(dwr_loc_EtaII,1:ndim)**2.0

      ! value of the target functional
      call DWR%J%computeJu( grid )
      state%estim( dwr_Juh, 1:ndim ) =  DWR%J%Ju**2.0 ! has to be sqrt in future - its done in WriteOutputError


      call DWR%J%computeJu_exact( grid )

      ! Ju_exact is not right if the exact solution is unknown
      state%estim( dwrE, 1 ) = abs( DWR%J%Ju - DWR%J%Ju_exact )**2.0 ! its squarerooted in WriteOutputEstims


      print*, '----------------------------------------------------------------------------'

!      if (DWR%deg_plus) then
!        write(*,*) ' EtaI_primal      EtaII_primal      J(u)-J(u_h)       J(u)          J(u_h) '
!        write(*, '(5es16.8)')  &
!         sqrt( state%estim(dwrEtaI_primal,1)), sqrt(state%estim(dwrS,1)), sqrt(state%estim( dwrE, 1)), &
!                DWR%J%Ju_exact, DWR%J%Ju
!      else
         !write(*,*) '  EtaI          EtaII         J(u)-J(u_h)          J(u)                 J(u_h) '
         !write(*, '(3es14.6, 2es24.16)')  &
         !     sqrt( state%estim(dwrEtaI_aver,1:ndim) ), globalEstim(1:ndim), sqrt(state%estim( dwrE, DWR%J%coord_i)), &
         !     DWR%J%Ju_exact, DWR%J%Ju

         write(*, '(a15, es24.16, a15, es14.6,es16.4)') 'exact  J(u) =', DWR%J%Ju_exact, 'EtaI (etaA) =',&
              sqrt( state%estim(dwrEtaI_primal,1) ),  sqrt(state%estim( dwrEtaI_primalA, 1))

         write(*, '(a15, es24.16, a15, es14.6,es16.4)')'direct J(u_h) =', DWR%J%Ju, 'EtaI(etaA*) =', &
              sqrt( state%estim(dwrEtaI_dual,1) ), sqrt(state%estim( dwrEtaI_dualA, 1))

         write(*, '(a15, es24.16, a15, 2es14.6)') 'BiCG  J(u_h) =', DWR%J%Ju_BiCG, &
              'J(u)-J(u_h)=',abs(DWR%J%Ju-DWR%J%Ju_exact), abs(DWR%J%Ju_BiCG-DWR%J%Ju_exact)

         write(*, '(3(a15, es14.6))') "etaII primal: " , sqrt(state%estim( dwrS, 1)), &
              "etaII dual: " , sqrt(state%estim( dwr_dualS,1 ) ), &
              "etaII aver: " , sqrt(state%estim( dwr_aver,1 ) )

!      endif

!      print*, "GAMMA IN/OUT"
!      do i=1,nelem
!        elem => grid%elem(i)
!        ! boundary element
!        if ( minval(elem%face(neigh,:))  <= 0 .and. any(elem%iBC(:) /= 0) ) then
!          write(*, '(a7, i7, a6 ,es10.2)') , "elem i:", i, "etaS: ", elem%eta(dwrS,1)
!          write(*, '(a15, 10es10.2)') , "etaKB:", elem%eta( dwrEtaKB,1:ndim)
!          write(*, '(a15, 10es10.2)') , "weightKB:", elem%eta( dwrWeightKB,1:ndim)
!!          print*, "weightKV:", elem%eta( dwrWeightKV,1:ndim)
!!          print*, "weightKB:", elem%eta( dwrWeightKB,1:ndim)
!        end if
!      end do



      !write(*, '(40es16.8)') sqrt(state%estim(1:5, 1))
      print*, '----------------------------------------------------------------------------'
!      print*, 'Primal etaII = ', state%estim( dwrS,     1:ndim )**0.5
!      print*, 'Dual etaII = ', state%estim( dwr_dualS,     1:ndim )**0.5

      !call controlIfEtaIisLessThanEtaII( grid ) ! which should hold on all elements

      ! moved to ComputeDWRAnisotropicMetric in ama-hp_DWR.f90
      if(state%space%adapt%adapt_type == 'HG') &
           call DWR%writeDWRerrorsFile(grid%nelem, state%nsize )

      call state%cpuTime%addEstimTime()

      !stop 'End of computeDWRAnisotropicEstimates!'

  end subroutine computeDWRAnisotropicEstimates


  ! move the estimates to the new eta given in paramets
  ! so that they dont get overwitten by the anisotropic estimates etaII
  subroutine moveDWRestimatesEtaI( DWR, grid )
    type( DWR_t ), intent(inout) :: DWR
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    integer :: i, j, kk, k, elemDof, dof, Qnum, nelem, mdof

    do i = 1, grid%nelem
      elem => grid%elem(i)
      elem%eta( dwrEtaI_aver ,1:ndim) = elem%eta( dwr_aver ,1:ndim)
      elem%eta( dwrEtaI_primal ,1:ndim) = elem%eta( dwrS ,1:ndim)
      elem%eta( dwrEtaI_dual ,1:ndim) = elem%eta( dwr_dualS ,1:ndim)
      elem%eta( dwrEtaI_aver_abs ,1:ndim) = elem%eta( dwr_aver_abs ,1:ndim)

      elem%eta( dwrEtaI_primalA ,1:ndim) = elem%eta( dwrA ,1:ndim)
      elem%eta( dwrEtaI_dualA ,1:ndim) = elem%eta( dwr_dualA ,1:ndim)

    end do !i

    state%estim( dwrEtaI_aver, 1:ndim ) = state%estim( dwr_aver, 1:ndim )
    state%estim( dwrEtaI_primal, 1:ndim ) = state%estim( dwrS, 1:ndim )
    state%estim( dwrEtaI_dual, 1:ndim ) = state%estim( dwr_dualS, 1:ndim )
    state%estim( dwrEtaI_aver_abs, 1:ndim ) = state%estim( dwr_aver_abs, 1:ndim )
    state%estim( dwrEtaI_primalA, 1:ndim ) = state%estim( dwrA, 1:ndim )
    state%estim( dwrEtaI_dualA, 1:ndim ) = state%estim( dwr_dualA, 1:ndim )



  end subroutine moveDWRestimatesEtaI

  ! control whether it holds etaI < etaII
  ! which should be true on all elements
  subroutine controlIfEtaIisLessThanEtaII( grid )
    class( mesh ), intent(in) :: grid
    class( element ), pointer :: elem
    integer :: i

    do i = 1,grid%nelem
      elem => grid%elem(i)
      if ( grid%elem(i)%eta(dwrS,1) < grid%elem(i)%eta( dwrEtaI_primal,1) ) then
        print*, 'elem i = ', i, 'Problem with PRIMAL EST - etaII < etaI ! ', &
             grid%elem(i)%eta(dwrS,1), '<' , grid%elem(i)%eta( dwrEtaI_primal,1)
      endif
      if ( grid%elem(i)%eta(dwr_dualS,1) < grid%elem(i)%eta( dwrEtaI_dual,1) ) then
        print*, 'elem i = ', i, 'Problem with DUAL EST - etaII < etaI ! ', &
          grid%elem(i)%eta(dwr_dualS,1), '<' , grid%elem(i)%eta( dwrEtaI_dual,1)
        print*, 'While primal estimates are: etaII etaI'
        print*, grid%elem(i)%eta(dwrS,1), ' ? ' , grid%elem(i)%eta( dwrEtaI_primal,1)
        !stop

        if ( maxval(elem%iBC(:)) == 0 ) then
          print*,'INNER ELEMENT with problems!'
          stop
        endif
      endif

    end do
!      print*, '***'
!      print*, '         ***   etaII, etaI    '
!      print*, 'elem%76 primal :', grid%elem(76)%eta(dwrS,1), grid%elem(76)%eta( dwrEtaI_primal,1)
!      print*, 'elem%76 dual:', grid%elem(76)%eta(dwr_dualS,1), grid%elem(76)%eta( dwrEtaI_dual,1)
!      print*, 'elem%82 primal:', grid%elem(82)%eta(dwrS,1), grid%elem(82)%eta( dwrEtaI_primal,1)
!      print*, 'elem%82 dual:', grid%elem(82)%eta(dwr_dualS,1), grid%elem(82)%eta( dwrEtaI_dual,1)

  end subroutine controlIfEtaIisLessThanEtaII


  !> evaluation of the local error estimates of the target quantity for BiCG solver
  subroutine EstimateNewton_DWR( grid, newton, DWR, newtonDone )
    class( mesh ), intent(inout) :: grid
    class( NonlinearSol_t ), intent ( inout ) :: newton ! this should the Newton type
    type( DWR_t), intent(inout), target :: DWR
    logical, intent(inout) :: newtonDone

    print*,'EstimateNewton_DWR  STARTED !!!',newton%iter

    ! allocation of arrays
    if(newton%iter == 1 .and. state%time%iter_loc == 1) then
       print*, 'allocate elem%wST_Ritz'
       call Allocate_wzST_Ritz(grid,  state%p_mod_max)
    endif

    ! setting of the RHS  done in damping
    !call ComputeST_Terms( .true. )

    !print*,' Ritz reconstruction of the primal and dual solutions'
    call RitzReconstr_PrimalDual_big( grid )


    !print *,'! evaluating of the algebraic and discretization estimates of J(u) - J(u_h)'
    call EstimateNewton_DWR_algeb(grid, newton, DWR, newtonDone)


    !stop "SXu93j93j3oo3____________   end subroutine EstimateNewton_DWR "
  end subroutine EstimateNewton_DWR

  !> evaluation of the local error estimates of the target quantity for BiCG solver
  !> -- the core subroutine
  subroutine EstimateNewton_DWR_algeb( grid, newton, DWR, newtonDone)
    class( mesh ), intent(inout) :: grid
    class( NonlinearSol_t ), intent ( inout ) :: newton ! this should the Newton type
    type( DWR_t), intent(inout), target :: DWR
    logical, intent(inout) :: newtonDone
    class( element ), pointer :: elem
    real, dimension(:, :, :), pointer :: dual_rhsST
    integer :: i, j, k, degP, dof, dofP, Tdof, p_mod, ifile
    real :: valPs, valDs, valPa, valDa
    real :: GvalPs, GvalDs, GvalPa, GvalDa
    real :: epsR, epsA, epsT, facP, facD, max_facD, max_facP

    ifile = 100 + 10*state%time%iter + newton%iter

    epsR = 1E-7   ! relative tolerance
    epsA = 1E-13  ! absolute tolerance

    epsT = state%space%adapt%tol_max / grid%nelem

    p_mod = 1
    Tdof = 1

    max_facP  = 0.
    max_facD  = 0.

    GvalPa = 0. ; GvalPs = 0.
    GvalDa = 0. ; GvalDs = 0.

    newtonDone = .true.

    do i = 1, grid%nelem
      elem => grid%elem(i)
      dof  = elem%dof
      dofP = DOFtriang( elem%deg + p_mod)

      dual_rhsST =>  DWR%dualRes(i)%x( 1:ndim, 1:dofP, 1:Tdof)

      valPa = 0. ; valPs = 0.
      valDa = 0. ; valDs = 0.

      do j=1,Tdof
         do k=1, ndim

            ! primal residuum * dual weight
            valPa = valPa + dot_product(elem%rhsST( k, 1:dof ,j),elem%zST(k, 1:dof ,j))
            !valPa = valPa + dot_product(elem%rhsST( k, 1:dof ,j),elem%zST_Ritz(k, 1:dof ,j))
            !valPs = valPs + dot_product(elem%rhsST( k, 1:dofP,j),elem%zST_Ritz(k, 1:dofP,j))
            valPs = valPs + dot_product(elem%rhsST( k, dof+1:dofP,j),elem%zST_Ritz(k, dof+1:dofP,j))

            !print*,'##########', &
            !     dot_product(elem%rhsST( k, 1:dofP,j),elem%zST_Ritz(k, 1:dofP,j)), &
            !     dot_product(elem%rhsST( k, dof+1:dofP,j),elem%zST_Ritz(k, dof+1:dofP,j))


            ! dual residuum * primal weight
            valDa = valDa + dot_product(dual_rhsST( k, 1:dof ,j),elem%wST(k, 1:dof ,j))
            !valDa = valDa + dot_product(dual_rhsST( k, 1:dof ,j),elem%wST_Ritz(k, 1:dof ,j))
            !valDs = valDs + dot_product(dual_rhsST( k, 1:dofP,j),elem%wST_Ritz(k, 1:dofP,j))
            valDs = valDs + dot_product(dual_rhsST( k, dof+1:dofP,j),elem%wST_Ritz(k, dof+1:dofP,j))

            write(41,'(a6, i5, 300es14.6)') 'rezP:',i, elem%rhsST( k, 1:dofP,j)
            write(45,'(a6, i5, 300es14.6)') 'solD:',i, dual_rhsST( k, 1:dofP,j)
         enddo
      enddo


      GvalPa = GvalPa + valPa
      GvalPs = GvalPs + valPs
      GvalDa = GvalDa + valDa
      GvalDs = GvalDs + valDs

      !GvalPa = GvalPa + abs(valPa)
      !GvalPs = GvalPs + abs(valPs)
      !GvalDa = GvalDa + abs(valDa)
      !GvalDs = GvalDs + abs(valDs)



      facP = abs(valPa) / max( abs(valPs), epsA)
      facD = abs(valDa) / max( abs(valDs), epsA)

      !if ( abs(valPa) > epsA )  max_facP  = max ( max_facP  , facP)
      !if ( abs(valDa) > epsA )  max_facD  = max ( max_facD  , facD)

      max_facP  = max ( max_facP  , abs(valPa) )
      max_facD  = max ( max_facD  , abs(valDa) )

      write(ifile, *) i, abs(valPa), abs(valPs), abs(valDa), abs(valDs), facP, facD

      !if( ( facP > epsR  .or. facD > epsR ) .and. &
      !     ( abs(valPa) > epsA .or. abs(valDa) > epsA)  ) then

      if(abs(valPa) > epsT  .or. abs(valda) > epsT)  then
         newtonDone = .false.

         !if(facP > 1E-2 .or. facD > 1E-2) &
         !if(abs(valPa) > epsT  .or. abs(valda) > epsT) &
         !     write(*,'(a8, i5, 30es14.6)') 'AA estim:',i, facP, facD, valPa, valPs, valDa, valDs

      endif

      !write(*,'(a8, i5, 30es14.6)') 'AA estim:',i, facP, facD, valPa, valPs, valDa, valDs

   enddo  ! do i=1, grid%nelem

   print*,'-------------------------------------------------------------------------'
   write(*,'(a20, 6es14.6)') 'Maximal Local errors:', max_facP, max_facD
   write(*,'(a20, 30es14.6)')'Global errors:', abs(GvalPa /GvalPs),abs(GvalDa /GvalDs), &
        GvalPa, GvalPs, GvalDa, GvalDs
   print*,'-------------------------------------------------------------------------'

         stop " COMPARE residuals"


   if(newton%iter == 1) write(99,'(x)')
   write(99, *) state%time%iter + 1. * newton%iter/ (newton%max_iter + 2) , &
        abs(GvalPa), abs(GvalPs), abs(GvalDa), abs(GvalDs), max_facP, max_facD
 end subroutine EstimateNewton_DWR_algeb


  !> allocate array elem%wST_Ritz ,  elem%zST_Ritz
  subroutine Allocate_wzST_Ritz(grid, p_mod)
    class( mesh ), intent(inout) :: grid
    integer, intent(in) :: p_mod
    class( element ), pointer :: elem
    integer :: i, degP, dofP

    do i = 1, grid%nelem
      elem => grid%elem(i)

      ! arrays for reconstructed solutions
      degP = elem%deg + p_mod
      dofP  = DOFtriang(degP)  !

      if ( associated( elem%wST_Ritz ) ) deallocate( elem%wST_Ritz )
      if ( associated( elem%zST_Ritz ) ) deallocate( elem%zST_Ritz )

      allocate( elem%wST_Ritz( 1:ndim, 1:dofP, 1:1), source = 0.0 )   !!1:elem%Tdof ) )
      allocate( elem%zST_Ritz( 1:ndim, 1:dofP, 1:1), source = 0.0 )   !!1:elem%Tdof ) )
   enddo

 end subroutine Allocate_wzST_Ritz





  !> simpliffied routine for DWR error estimates called for stationary problems
  !> called in each time step if estA \leq estS
  !> 1) the dual problem is solved
  !> 2) reconstruct the solutions (for aDWR already done)
  !> 3) solve the discretization error estimates: * \etaI -
  subroutine EstimatesDWR_BI( DWR, grid, finish)
    type( DWR_t ), intent(inout) :: DWR
    class( mesh ), intent(inout) :: grid
    logical, intent(inout) :: finish
    real :: etaA, etaS, etaAd, etaSd, eta_aver, fac_iter, tolAS
    real :: r_tol, r_est
    real :: t1, t2, factor
    r_tol = 0.
    r_est = 0.

    finish = .false.

    write(*,*) 'subroutine EstimatesDWR_BI called'

    ! for aDWR the estimates are already computed

    ! we need only the dual residual to be ready
    ! Fill residual vector - the residuum is always of the BIGGEST SIZE
    call computeDualResidualPlus(grid, DWR, .false.)

    ! ! DWR_P method -> SOLVE ALSO BIGGER DUAL PROBLEM
    ! if (DWR%deg_plus) then
    !    !         call state%setP_mod( 1 )
    !    print*, "Solve the bigger Dual problem"
    !    call SolveDualProblem_stat( DWR, grid, convfile )
    !    ! It is necessary to call computeDualResidualPlus with FALSE
    !    ! -> to fill Residual depending on zST
    !    call computeDualResidualPlus(grid, DWR, .false.)
    !    !         call state%setP_mod( 0 )
    ! end if


    ! 2) reconstruct the solutions
    !call ReconstrPrimalDualSolutions( grid, DWR%R_type )
    call ReconstrPrimalDualSolutions( grid, 2 )  ! ONLY Ritz reconstruction (without LS)


    ! 3) solve the discretization error estimates
    ! ' original EE from the weak formulation:  eta^I'
    call computeDWRestimates( DWR, grid )
    ! move the estimates to the new eta given in paramets
    ! so that they dont get overwitten by the anisotropic estimates etaII

    call moveDWRestimatesEtaI( dwr, grid )

    ! global estimate should contain also the algebraic error
    if (state%nlSolver%non_alg_stop == 'aDWR') then
       if(ndim>1) stop 'ndim>1 not implemented for aDWR'
       r_est = DWR%estimDiscr + DWR%estimNL

    else if (state%nlSolver%non_alg_stop == 'nlDWR') then
       r_est = sqrt(state%estim( dwrEtaI_aver, 1)) + sqrt(state%estim( dwrEtaI_primalA, 1))

    else
       r_est = sqrt( abs( state%estim( DWR%eta_index, 1 ) ))
    endif

    r_tol = state%space%adapt%tol_max

    if(r_est  < r_tol) finish = .true.

    etaA  = sqrt(state%estim( dwrEtaI_primalA, 1))
    etaS  = sqrt(state%estim( dwrEtaI_primal, 1))
    etaAd = sqrt(state%estim( dwrEtaI_dualA, 1))
    etaSd = sqrt(state%estim( dwrEtaI_dual, 1))
    eta_aver = sqrt(state%estim( dwrEtaI_aver, 1))

    !print*
    !write(*,'(a6, 8a12)') 'EST', 'etaS', 'etaSd', '!etaA!', 'etaAd', 'relat', 'relatD', '!aver!'
    !write(*,'(a9, 8es12.4)') 'ESTIM 1:', & !  S_P, S_D, A_P, A_D', &
    !     etaS, etaSd, etaA, etaAd, etaA/etaS, etaAd/ etaSd, eta_aver


    factor = state%nlSolver%tol2
    !if(  etaA  <= factor * etaS   )  finish = .true.
    if(  etaA  <= factor * eta_aver  )  finish = .true.
    if(  etaA  > eta_aver  )  finish = .false. ! additional conditions avioiding patological cases
    !if(  etaA  <= factor * etaS  .and. etaAd <= factor * etaSd )  finish = .true.

    write(*,'(a30, 4(es9.2,a3) )') " # NLDwr(etaA,tolA,eta,tol):", &
         etaA,' ?<', factor*eta_aver,'(',r_est,' ?<',r_tol,')'
    !write(*,'(a30, l3, 3es12.4,a2,es12.4,a3,es12.4,a2)') " # ! EstimatesDWR_BI finished", &
    !     finish,  DWR%estimNL, DWR%estimLD, factor,'(',r_est,'<?',r_tol,')'
    !print*

    !print*,'######################## EstimatesDWR_BI ', state%state_of_terms%is_DWR_etaI_actual( )

    ! update of state%state_of_terms
    call state%state_of_terms%new_DWR_etaI_was_computed()

    !print*,'######################## EstimatesDWR_BI ', state%state_of_terms%is_DWR_etaI_actual( )


  end subroutine EstimatesDWR_BI

end module estimates



