!> module for the dual problem for DWR (dual weighted residuals) approach
module dwr_mod
   use dwr_alg_mod
   use elemental_mod
   use solution_mod
   use target_functional_mod

   implicit none

   type :: DWR_t
      integer :: id !type of the DWR target functional (will be specified in paramets)
      logical :: RESindicators ! if .true. : the adaptation of the mesh uses the  RES error indicators instead of DWR
      logical :: deg_plus ! dual problem solution for p + p_plus
      integer :: R_type ! type of the reconstruction , 0 - L2, -1 - H1, 2 - reconstructiton based on a(u,vp+) = res(vp+)
      character(len=20) :: wType ! type of reconstruct which will be used as WEIGHT in estimates
      logical :: ANIindicators ! new version of the estimates connected to DWR_A
      integer :: eta_index ! which eta should be used to adaptation of the mesh (temporarily set in the program), should be in ini
      class( Target_functional_t ), allocatable :: J ! target functional
      logical :: dualProblem_computed
      real :: linSolver_tol ! tolerance of the linsolver for dual problem
      integer :: linSolver_iter ! number of lin Solver iterations
      real :: residuum
      real, dimension(:), allocatable ::  x, b, b1, rr  ! arrays for computation
      type( Elemental3_t ), dimension(:), allocatable :: rhs
      type( Elemental3_t ), dimension(:), allocatable :: dualRes ! the residual vector of the dual problem
      type( DWR_alg_t ) :: aDWR ! class of all needed variables for the aDWR method for error estimation of the alg. errors
      character(len=20) :: file_error_name

      logical :: BI ! simultaneous solution of the primal and dual problems  using of BiCG method

      logical :: PU ! use the partition of unity for localization of the estimates
      type( Elemental1_t ), dimension(:), allocatable :: pu_estim ! used for local estims using PU
      real, dimension(:), allocatable :: vertex_estim ! used for local vertex estims using PU, 1:npoin
      integer, dimension(:), allocatable :: num_of_vertex_elem ! used for local vertex estims using PU, 1:npoin

      real :: estimLP, estimLD, estimNL ! linear primal and dual estimate, nonlinear estimate
      real :: estimDiscr ! discretization estimate
      real :: nlDWR_tol ! tolerance for stopping newton iters when nlDWR is used


   contains
      procedure :: setBasicVariables
      procedure :: delete => del_DWR
      procedure :: init => initDWR
      procedure :: update => updateDWR
      procedure :: clean => cleanDWR
      procedure :: setRHS

      procedure :: fillRHSvector ! %rhs -> %b
      procedure :: fill_RHS_to_1D_vector
      procedure :: fillDualResVector ! dualResidual
      procedure :: distributePuEstimToVertices
      procedure :: initDualLinSolver
      procedure :: etaFromVertices
      procedure :: writeNlErrorFile
      procedure :: writeDWRerrorsFile ! similirar to the procedure in aDWR
!      procedure :: newtonDone

   end type DWR_t

!   interface DWR_t
!       procedure :: initDWR
!      !procedure :: otherWayOfInitDWR - e.g. copy constructor
!   end interface DWR_t

!    public :: InitDualProblem
    public :: computeDualLinearDWRestimates
    public :: computePrimalLinearDWRestimates
    public :: evalElemRHS_BoundaryFlux
    public :: setWeightFun_primalEst
    public :: setWeightFun_dualEst

   !> main structure for DWR error estimates
   type( DWR_t ), allocatable :: DWR

   contains

    !> set basic variables from state%space%estim_space
    subroutine setBasicVariables( this, estim_space, iEstim)
      class(DWR_t), intent(inout) :: this
      character(len=20), intent(in) :: estim_space
      integer, intent(in) :: iEstim


      this%id = iEstim
      this%RESindicators = .false.
      this%deg_plus = .false.
      this%ANIindicators = .false.
      this%BI = .false.

      if (estim_space == 'DWR_RES') then
        write(*,'(a54)') '  # DWR: The RES technique is used for mesh   adaptation'
        DWR%RESindicators = .true.
      else if (estim_space == 'DWR_P') then
        write(*,'(a54)') '  # DWR: The DP is computed in HO space of deg p+p_plus'
        DWR%deg_plus = .true.
      ! new estimates based on the article Dolejsi 2017 - anisotropic
      else if (state%space%estim_space == 'DWR_ANI') then
        write(*,'(a54)') '  # DWR: New error estimates enabling anisotropic refinement!'
        DWR%ANIindicators = .true.
      else if (estim_space == 'DWR_ANI_P') then
        write(*,'(a54)') &
        '  # DWR: The DP is computed in HO space and Anisotropic estims are used.'
        DWR%deg_plus = .true.
        DWR%ANIindicators = .true.
      end if

    end subroutine setBasicVariables


    !> init basic DWR structure
    !> should be called only once - in the beginning of computation
    !  function initDWR( id, grid) result(this) - it does new allocation???
    subroutine initDWR( this, grid )
      class(DWR_t), intent(inout) :: this
      class( mesh ), intent(in) :: grid
      integer :: i
      integer :: iFile = 41
      real, dimension(1:nbDim) :: xy_coord
      integer :: weight_i ! weighting function index in the functional J

      if ( state%time%deg > 1 ) &
            stop 'in initDWR - stationary problems work only for q=0'

      this%dualProblem_computed = .false.

      !set DWR_PARAMETERS
      this%PU = .false. !.true. ! use partition of unity to localization
      if (this%PU) &
         print*, ' # DWR: Vertex oriented partition of unity is used for mesh adaptation.'

      !set DWR_PARAMETERS
      ! type of reconstruction
      this%R_type = -10!-1 => H1, 0 =>  L2, 2 => Ritz
      ! R_Type = -10 -> then it computes both LS and RITZ reconstruction and
      ! Ritz reconstruction is then saved into wSTplus(,,-1), zSTplus(,,-1)
      !if (this%R_type <= 1) then
      !	this%R_type = -1 ! -1 - H1, 0 - L2, 2 - Ritz
      if (this%R_type < -1) then
         print*, '# DWR: Both LS and Ritz reconstructions are computed!'
         ! estimates controlling the size of elems based on Ritz wST(:,:,-1) and the hp-,anisotropy uses LeastSquares wST(:,:,0:2)
      else if (this%R_type <= 0) then
         print*, '# DWR: We use the LS reconstruction.'
         print*, "not works now - estimation is based on wST(:,:-1) which is not filled"
         print*, "****"
         stop
      else if (this%R_type==2) then
         print*, '# DWR: We use the reconstruction based on a(up,vp+) = res(vp+) forall vp+ \in Shpp!'
         print*, "not works now - estimation is based on wST(:,:-1) which is not filled"
         print*, "****"
         stop
      else
         stop 'Unknown type of reconstrution.'
      endif

      ! choose which kind of reconstruction/solution will be used in weightFun in estimates
      if (this%deg_plus ) then
        this%wType = "GlobSol"
      else
        this%wType = "LocRitz"
        !this%wType = "LocLS"
        !print*, "LocLS was set temporarily for testing purposes!"
      end if

      !set DWR_PARAMETERS
      ! tolerance of the lin solver for dual problem

      !this%linSolver_tol = state%linSolver%tol * 1.E-6
      this%linSolver_tol = state%linSolver%tol
      !this%linSolver_tol = 1.E-12
      if( state%modelName == 'NSe') this%linSolver_tol = state%linSolver%tol * 1.E-6

      print*, '# TODO FR_aDWR: REPAIR! DWR: Linear solver tolerance for DUAL problem set to ' , this%linSolver_tol

      !set eta index
      !set DWR_PARAMETERS
!      this%eta_index = dwrS
!      this%eta_index = dwr_dualS
       this%eta_index = dwr_aver



      if ( this%ANIindicators ) then
        print*, "Temporarily the element size will be done according to etaII"
        this%eta_index = dwrEtaI_aver  !dwr_aver
!        this%eta_index = dwr_aver  !dwr_aver
      end if
      !!!if ( this%ANIindicators ) this%eta_index = dwr_aver  !dwr_aver

      !if (this%deg_plus ) this%eta_index = dwrS ! for DWR_P the dual residual is not computed

      if (this%eta_index == dwr_aver) then
          write(*,*) '# DWR: AVERAGE residual estimator used for mesh adaptation!'
      else if (this%eta_index == dwrS) then
          write(*,*) '# DWR: PRIMAL residual estimator used for mesh adaptation!'
      else if (this%eta_index == dwr_dualS) then
          write(*,*) '# DWR: DUAL residual estimator used for mesh adaptation!'
       elseif (this%eta_index == dwrEtaI_aver) then
          write(*,*) '# DWR: AVERAGE residual estimator used for mesh adaptation!, ETA^I'
      else
        stop 'unknown eta index is chosen in initDWR!'
      endif


       weight_i = 1 ! implicit j_om = 1 in whole domain
      ! init the target functional this%J
      select case(this%id)
         case(1)
            write(*,'(a50)') '  # Target functional: Integral of U over a part of Neumann boundary.'
            xy_coord(1:2) = (0.0, 0.0) ! not used here
            allocate( BoundaryValue_t :: this%J )

            if(state%model%icase == 80 .or. state%model%icase == 81) then
               weight_i = 6 ! especially for problem Carpio 2013
               print*, 'The weight function in J(u) is chosen especially for icase=80, Carpio 2013!'

            elseif(state%model%icase == 76) then
               weight_i = 8 ! especially for problem Harriman
               print*, 'The weight function in J(u) is chosen especially for icase=76, Harriman!'

            elseif(state%model%icase == 63) then
               weight_i = 9 ! especially for battery
               print*, 'The weight function in J(u) is chosen especially for icase=63, battery!'

            elseif(state%model%icase == 67) then
               weight_i = 9 ! especially for simplified battery
               print*, 'The weight function in J(u) is chosen especially for icase=67, simplified battery!'
            elseif( state%model%icase == 88 .or. state%model%icase == 89) then  ! Burgers
               ! the functional is:   J(u) = (jN f'(u).n,  u)_{\gomN}, see dwr_res, key NTF
               weight_i = 11 !  Gauss function arround x(1) = 0.5

            else
               stop 'UNKNOWN case in DWR setting YTGH'
            endif

         case(2)
            write(*,'(a65)') ' # Target functional: Flux of U though part of Dirichlet boundary'
            xy_coord(1:2) = (0.0, 0.0) ! not used here
            allocate( BoundaryFlux_t :: this%J )

            if(state%model%icase == 15 .or. state%model%icase == 36 &
                 .or. state%model%icase == 88) then
               weight_i = 1 ! especially for simplified battery
               !print*, ' # The weigh function in J(u) is chosen especially for icase=15,', &
               !     'quasilinear L-shape'
            else
               stop 'UNKNOWN case in DWR setting AIGH'
            endif

         case(3)
            write(*,'(a50)') '  # Target functional: Point value of the solution in (0.5, 0.5)'
            ! FERROR coordinates should be set in the .ini file in future
!            xy_coord(1:2) = (0.75, 0.25)
!            allocate( Point_value_t :: this%J )
!            call this%J%init( xy_coord, grid, weight_i )
            stop 'Point value does not work'
         case(4)
            write(*,'(a100)') '# Target functional: Integral of U over subdomain specified in the submesh file.'
            !write(*,'(a100)') '  # For now it approximates the value in the middle (divides by the area of the support)!'
            ! FERROR coordinates should be set in the .ini file in future
            xy_coord(1:2) = (0.0, 0.0) ! not used here
            allocate( U_over_subdomain_t :: this%J )
         case(5)
            write(*,'(a100)') '# Target functional: Integral of the solution du/dx over subdomain specified in the submesh file.'
            ! FERROR coordinates should be set in the .ini file in future
            xy_coord(1:2) = (0.0, 0.0) ! not used here
            weight_i = 5 ! constant -1
            allocate( dudx_t :: this%J )
         case(6)
            write(*,'(a86)') '# Target functional: Integral of j_om*u over subdomain specified in the submesh file.'
            !write(*,'(a100)') '  # For now it approximates the value in the middle (divides by the area of the support)!'
            ! FERROR coordinates should be set in the .ini file in future
            weight_i = 7 ! 4 !
            if (weight_i == 7) &
              write(*,'(a44)') '## J for case = 78, to have known exact z! Convection ADDED! not tested '
            xy_coord(1:2) = (0.0, 0.0) ! not used here
            allocate( U_over_subdomain_t :: this%J )
         case(7:9)
            write(*,'(a50)') ' # Unknown type of target functional! STOPPING!'
            stop
         case(10:12)
            write(*,'(a43)') ' # Drag,Lift or Momentum target functional!'
            xy_coord(1:2) = (/0.25, 0.0 /) ! x_ref for momentum coef
            !weight_i is used for setting kind of J
            weight_i = this%id
            allocate( DragAndLift_t :: this%J )

         case(13:)
            write(*,'(a52)') '  # Unknown type of target functional! STOPPING (2)!'
            stop
      end select
      ! initialize the target functional according to its allocated type
      call this%J%init( xy_coord, grid, weight_i)

      this%J%id = this%id

      
      this%estimDiscr = 1.0
      ! temporal for export
      this%aDWR%file_error_new = "aDWR_nlErrors"


      if ( state%nlSolver%non_alg_stop == 'aDWR' ) &
         call this%aDWR%init()
      if ( state%nlSolver%non_alg_stop == 'aDWR' .and. DWR%deg_plus) &
         stop 'the DWR_P method is not compatible with aDWR algebraic estimates!'

      if ( DWR%ANIindicators ) then
         this%file_error_name = "DWR_AMA_estim"
      else
         this%file_error_name = 'aDWR_errors'
      endif


      open( 48, file = this%file_error_name, action="write", status="replace" )
      close( 48 )

   end subroutine initDWR

   ! final subroutine, cleaning the DWR structure
   ! should be called in the end of the computation
   subroutine del_DWR(this)
     class( DWR_t ) :: this
     integer :: i,l

     call this%clean()

     if (allocated(this%J)) then
         call this%J%delete()
         deallocate(this%J)
     endif

   end subroutine del_DWR

   !> update DWR structures in the beginning of each solution procedure
   !> should be called after each mesh adaptation
   subroutine updateDWR( this, grid )
      class(DWR_t), intent(inout) :: this
      class( mesh ), intent(in) :: grid
      integer :: i

      if ( state%nlSolver%non_alg_stop == 'aDWR' ) then
       !print*, 'cleanDWR called! '
         call this%clean()
         call this%aDWR%update()
      endif

      if ( state%getP_mod() > 0 ) then
        print*, "****"
        print*, "updateDWR was called for p_mod >0, but it does NOTHING in this case!"
        print*, "****"
      else
        !this%estimDiscr = 1.0
        this%estimNL = 1.0
        this%estimLD = 1.0
        this%estimLP = 1.0

        !moved from InitDualProblem( this, this%deg_plus )
        if (state%time%quadType /= 'Radau') &
           stop 'Radau quadrature must be used - to compute C(u(T))'
        if (.not. allocated(this%J)) &
           stop  'DWR%J is not allocated !!!'

        call this%J%findSupp( grid )

        if ( this%J%isupp == 0 ) then
           print*, 'epsilon: ', DWR%J%eps, 'grid%h/2', grid%h / 2.0
           stop 'zero support of target func (1)'
        endif

        call this%setRHS( grid ) ! NOT necessary for non-linear problems

        if (state%time_dependent) &
           stop 'we need to compute IC for dual problem in updateDWR!'

        if ( state%nlSolver%non_alg_stop == 'aDWR' ) then
           !print*, 'InitDualLinSolver in updateDWR'
           call this%initDualLinSolver( grid )
        endif


      end if


   end subroutine updateDWR

   ! subroutine cleaning the structures in DWR,
   ! which should be called after each adaptation
   subroutine cleanDWR(this)
      class( DWR_t ) :: this
      integer :: i , l

      if (allocated(this%b))    deallocate( this%b )
      if (allocated(this%x))    deallocate( this%x )
!      if (allocated(this%b1))   deallocate( this%b1 )
      if (allocated(this%rr ))  deallocate( this%rr )

      if (allocated( this%rhs ) ) then
         l = size( this%rhs(:) )
         do i = 1,l
            call this%rhs(i)%delete()
         enddo
         deallocate( this%rhs )
      endif

      if (allocated( this%dualRes ) ) then
         l = size( this%dualRes(:) )
         do i = 1,l
            call this%dualRes(i)%delete()
         enddo
         deallocate( this%dualRes )
      endif

      this%linSolver_iter = 0
      this%residuum = 0
      !this%dualProblem_computed = .false.

      if (allocated( this%pu_estim ) ) then
         l = size( this%pu_estim(:) ) ! = nelem
         do i = 1,l
            call this%pu_estim(i)%delete()
         enddo
         deallocate( this%pu_estim )
      endif
      if (allocated(this%vertex_estim))   deallocate(this%vertex_estim)
      if (allocated(this%num_of_vertex_elem))  deallocate(this%num_of_vertex_elem)

      call this%J%clean()

   end subroutine cleanDWR


   !> new version: save the dual_rhs to the vector b(1:nsize) DOES NOT use elem%rhsST
   !> set RHS for the dual problem used for DWR error estimation method
   subroutine setRHS(this, grid)
    class( DWR_t ), intent(inout) :: this
    class( mesh ), intent(in) :: grid
    class( element ), pointer :: elem
    integer :: i,j, dof, Qdof, k,l, nelem, i_elem, Tdof,iFace
    real :: time
    real, dimension(:,:), allocatable :: f, wi
    real, allocatable, dimension(:,:,:,:) :: flux
    logical :: compute_RHS, reallocate_rhs

    ! the setting of RHS is not necessary
    compute_RHS = .false.

    ! for nonlinear problems, we have to update RHS in each step
    if(.not. state%model%linear ) compute_RHS = .true.

    !print*, "TEST C infinity in setTheta = ", 1.0, state%model%linear

    ! control size
    if ( allocated( this%rhs ) .and. size( this%rhs(:) ) /= grid%nelem ) then
       !print*,  'this%rhs is obsolete and needs to be reallocated!'
       l = size( this%rhs(:) )
       do i = 1,l
          call this%rhs(i)%delete()
       enddo
       deallocate( this%rhs )
    endif

    ! allocattion and initialization of J
    if ( .not. allocated( this%rhs ) ) then

       if ( this%J%isupp == 0 ) then
          print*, 'J epsilon:', this%J%eps
          stop 'zero support of target func (2)'
       endif
       associate( JJ => this%J )

         nelem = grid%nelem
         allocate( this%rhs(1:grid%nelem) )
         do i = 1, nelem
            elem => grid%elem(i)
            Tdof = elem%Tdof
            call this%rhs(i)%init( ndim, elem%dof_plus,Tdof )
         enddo
         ! the setting of RHS is necessary
         compute_RHS = .true.
       end associate ! JJ

    endif

    ! computetion of RHS of the dual problem
    if(compute_RHS) then
       associate( JJ => this%J )

         ! TIME dependent version
         if (state%time_dependent) then
            stop 'set RHS not implemented for nonstationary problems'
            ! TIME independent
         else
            time = state%time%finTime

            do i = 1, this%J%isupp
               i_elem = this%J%supp(i)
               elem => grid%elem( i_elem )
               ! BUGFIX always compute for deg_plus test functions BUGFIX !
               dof = elem%dof_plus

               ! computes \int_{(\partial)K_{elem}} f \phi_{R} \ dx(or dS) \f$, for phi_1:dof
               select type ( JJ )
                  ! integrate over edges
               class is ( BoundaryValue_t)
                  iFace = elem%iSubmeshFace
                  Qdof = elem%face(fGdof, iFace )

                  allocate( f(1:Qdof, 1:ndim) , source = 0.0 )

                  do j = 1,Qdof
                     f(j,1:ndim) = this%J%evalWeightFunction( elem%xi( iFace,j,1:nbDim), time )
                  end do !j

                  ! nonlinear functional is J(u) = \int_\Gamma weight * \vec f(u) \cdot nn dS
                  if( this%J%id == 1 .and. this%J%iWeight == 11) then
                     ! nonlinear case, J(u) = (jN f'(u).n,  u)_{\gomN}, key NTF
                     ! e.g., burgers convection

                     allocate(wi(1:Qdof,1:ndim), source = 0.0)
                     ! eval w in !FACE! integ nodes, 0 - endpoint
                     call EvalwSTEdge(elem, iFace, elem%TQnum, 0, wi(1:Qdof,1:ndim), .false. )

                     allocate( flux(1:Qdof,1:nbDim, 1:ndim, 1:ndim), source = 0.0)

                     call Lin_f_s_scalar(ndim, Qdof, wi(1:Qdof,1:ndim), &
                          flux(1:Qdof,1:nbDim, 1:ndim, 1:ndim), elem%xi( iFace ,1:Qdof,1:nbDim) )

                     do k=1,ndim
                        f(1:Qdof,k) = f(1:Qdof, k) * &
                          ( flux(1:Qdof,1, k, k) * elem%n(iFace, 1) &
                          + flux(1:Qdof,2, k, k) * elem%n(iFace, 2) ) / elem%dn(iFace)
                     enddo


                     deallocate(wi, flux)
                  endif
                  
                  call EvalEdgeForNormalizedVec_2dim(elem, iFace, f(1:Qdof,1:ndim), dof, &
                       this%rhs(i_elem)%x(1:ndim, 1:dof, 1))
                  ! 1 in rhs 1:Tdof (stationary)

               class is (BoundaryFlux_t)
                  !if(i == 1) print*,' set RHS for DWR with BoundaryFlux_t -- TESTING!'
                  ! working reasonably
                  ! weight_function is = 1 at this moment
                  call evalElemRHS_BoundaryFlux( JJ, elem, i, &
                       this%rhs(i_elem)%x(1:ndim, 1:dof, 1:elem%Tdof)  )

               class is ( dudx_t )
                  Qdof = elem%Qdof

                  allocate( f(1:Qdof, 1:ndim) , source = 0.0 )
                  do j = 1,Qdof
                     f(j,1:ndim) = this%J%evalWeightFunction( elem%xi( 0,j,1:nbDim), time )
                  end do !j
                  call EvalVectorDphiDx_2dim( elem, f(1:Qdof, 1:ndim), &
                       dof, this%rhs(i_elem)%x(1:ndim, 1:dof, 1), JJ%dx )

               class is (U_over_subdomain_t)
                  Qdof = elem%Qdof
                  allocate( f(1:Qdof, 1:ndim) , source = 0.0 )
                  do j = 1,Qdof
                     f(j,1:ndim) = this%J%evalWeightFunction( elem%xi( 0,j,1:nbDim), time )
                  end do !j
                  !print*, 'rhs in dwr = ', elem%i, this%rhs(i_elem)%x(1:ndim, 1:dof, 1),'|', f(:,1)
                  !write(*,'(a15, i5, 30es12.4)') 'rhs in dwr = ', elem%i, f(:,1)
                  call EvalVectorB_2dim( elem, f(1:Qdof,1:ndim), &
                       dof, this%rhs(i_elem)%x(1:ndim, 1:dof, 1) )

               class is (DragAndLift_t)
                  iFace = JJ%suppFace(i) ! local index of the boundary edge
                  Qdof = elem%face(fGdof, iFace )

                  allocate( f(1:Qdof, 1:ndim) , source = 0.0 )

                  call this%J%evalWeightFunctionVector( elem, iFace, Qdof, &
                       f(1:Qdof,1:ndim) )

                  ! integrate f*\phi_i and put it to the dual RHS
                  ! here was problem with dnc sizes - all nc are UNIT now!
                  !                 call EvalEdgeVec_2dim(elem, iFace, f(1:Qdof,1:ndim), dof, &
                  !                       this%rhs(i_elem)%x(1:ndim, 1:dof, 1))
                  ! 1 in rhs 1:Tdof (stationary)
                  ! print*, 'RHS: elem i = ', i_elem, this%rhs(i_elem)%x(1, :, 1)
                  !                print*, "TESTING BC "
                  call EvalEdgeForNormalizedVec_2dim(elem, iFace, f(1:Qdof,1:ndim), dof, &
                       this%rhs(i_elem)%x(1:ndim, 1:dof, 1))

                  !if (elem%i == 334) print*,  "After computing rhs vector for drag and lift!"
               class default
                  stop 'unknown type of target functional in set RHS dwr !'
               end select

               if(allocated(f) ) deallocate(f)

               if (elem%Tdof > 1) &
                    stop 'problem in setRHS'

            enddo ! i
         endif ! time dependent

       end associate ! JJ

    else
       ! print*,
       ! print*, 'Rhs is already allocated (and filled?)-> do nothing in setRHS'
       ! print*, 'Maybe there should be test for length'
    endif ! computeRHS

  end  subroutine setRHS

   ! fill the global dual res vector to DWR%dualRes(:,:,:)
   ! SHOULD BE always call for the biggest possible nsize == bigNsize(p_mod_max,q_mod_max)
   subroutine fillDualResVector( this , grid, nsize, b )
      class( DWR_t ), intent(inout) :: this
      class( mesh ), intent(in) :: grid
      integer, intent(in) :: nsize ! size of the global vector
      real, dimension(1:nsize), intent(in) :: b ! the vector containing A^T*zST
      class( element ), pointer :: elem
      integer :: i, k, l, ijk
      integer :: dof_plus, ndof, nelem, Tdof
      real :: normOfTheResidual
      type( Elemental3_t ), dimension(:), allocatable :: elemental_b

      nelem = grid%nelem

      if (nsize /= state%bigNSize( state%p_mod_max, state%q_mod_max) ) &
        stop "fillDualResVector should be called only for the biggest possible size!"

      if ( allocated(this%dualRes) ) then
          deallocate(this%dualRes)
      endif
      allocate( this%dualRes(1:nelem) )

      allocate( elemental_b(1:grid%nelem))
      call copy_long_vector_to_Elemental3_sol_order(grid, nsize, b(1:nsize), &
              elemental_b)

!      ivec = 0
      do i=1,nelem
         elem => grid%elem(i)
         dof_plus = elem%dof_plus
         Tdof = elem%Tdof
         ndof = dof_plus * Tdof * ndim

         !allocate the array
         call this%dualRes(i)%init( ndim, dof_plus,Tdof )

!         kvec = ivec

         ! save to dualRes the dual residual vector
         ! Ritz - need this vector to rhs for the local problem used for computation of zSTplus
         do l = 1, Tdof
            do k = 1, ndim
!               this%dualRes(i)%x(k,1:dof_plus,l) = &
!                  this%rhs(i)%x(k,1:dof_plus,l) - b(kvec+1:kvec + dof_plus)

               this%dualRes(i)%x(k,1:dof_plus,l) = &
                  this%rhs(i)%x(k,1:dof_plus,l) - elemental_b(i)%x(k,1:dof_plus,l)
!               kvec = kvec + dof_plus
            enddo !k
         enddo !l

!         ivec = ivec + ndof

      end do !i


      normOfTheResidual = 0.0

      ! compute norm of the residual
      do i=1,nelem
          elem => grid%elem(i)
          do k = 1, ndim
             normOfTheResidual = normOfTheResidual + &
                norm2(this%dualRes(i)%x(k,1:elem%dof,1))**2.
          enddo !k
      end do

      deallocate(elemental_b)

!      print*, "### norm of the dual residual = " , normOfTheResidual**0.5

   end subroutine fillDualResVector

   !> fills the 3-dim RHS array this%rhs(:,:,:) to vector of the RHS this%b
   subroutine fillRHSvector( this, grid, plus )
      class( DWR_t ) :: this
      class( mesh ), intent(in) :: grid
      logical, intent(in) :: plus
      class( element ), pointer :: elem
      integer :: nsize, i, nelem, k, l, dof, ndof, kvec, ivec

      nelem = size( this%rhs )
      ivec = 0

      if ( .not. allocated(this%rhs) ) &
         stop 'DWR rhs is not allocated in fillRHSvector!'

      if (allocated(this%b)) &
         deallocate( this%b )

      if (plus) then
        nsize = sum( ndim*grid%elem(:)%dof_plus * grid%elem(:)%Tdof )
        !print*, 'nsize', nsize
        allocate( this%b(1:nsize), source = 0.0 )

        do i = 1,nelem
            elem => grid%elem(i)
            dof = elem%dof_plus
            ndof = ndim * dof * elem%Tdof
            kvec = ivec

            do l = 1, elem%Tdof
               do k = 1, ndim
                  this%b(kvec+1:kvec + dof) = this%rhs(i)%x(k,1:dof,l )
                  kvec = kvec + dof
               end do ! k
            enddo !l
            ivec = ivec + ndof

         end do

      else
         nsize = sum( ndim*grid%elem(:)%dof * grid%elem(:)%Tdof )
         allocate( this%b(1:nsize), source = 0.0 )
         do i = 1,nelem
            elem => grid%elem(i)
            dof = elem%dof
            ndof = ndim * dof * elem%Tdof
            kvec = ivec
            do l = 1, elem%Tdof
               do k = 1, ndim
                  this%b(kvec+1:kvec + dof) = this%rhs(i)%x(k,1:dof,l )
                  kvec = kvec + dof
               end do ! k
            enddo !l
            ivec = ivec + ndof
         end do

      endif

   end subroutine fillRHSvector


   !> fills the 3-dim RHS array this%rhs(:,:,:) to vector of the RHS this%b
   subroutine fill_RHS_to_1D_vector( this, grid, nsize, vec)
      class( DWR_t ) :: this
      class( mesh ), intent(in) :: grid
      real, dimension(1:nsize), intent(inout) :: vec

      class( element ), pointer :: elem
      integer :: nsize, i, nelem, dof, ndof, ivec, p_mod
      integer, dimension(1:3) :: dimensions

      nelem = size( this%rhs )
      ivec = 0

      if ( .not. allocated(this%rhs) ) &
         stop 'DWR rhs is not allocated in fill_RHS_to_1D_vector!'

      ! control nsize
      if (nsize /= state%getBigNSize()) then
        print*,nsize, "/=", state%getBigNSize()
        stop 'nsize is wrong is in fill_RHS_to_1D_vector!'
      endif

      p_mod = state%getP_mod()

      if (state%getQ_mod() /= 0) &
        stop "Qmod > 0 not implemented in fill_RHS_to_1D_vector"

      do i = 1,nelem
            elem => grid%elem(i)
            dof = DOFtriang(elem%deg + p_mod)
            ndof = ndim * dof * elem%Tdof

            dimensions(1:3) = (/ ndim, dof, elem%Tdof /)

            vec(ivec+1 : ivec+ndof) = &
              this%rhs(i)%copyTo1Darray3_1_2(dimensions)

            ivec = ivec + ndof
      end do

   end subroutine fill_RHS_to_1D_vector


  !> init \f$ C(u(T))^T z = J(\phi)  \f$ for nonlinear stationary problem
  !> called only with aDWR
  subroutine initDualLinSolver( this, grid)
    class( DWR_t ), intent(inout) :: this
    class( mesh ), intent(in) :: grid
    class( element ), pointer :: elem
    integer :: i, j, kk, k, elemDof, dof, degP, dofP, R_type, Qnum, nsize
    real :: t1, t2, time_prepare, time_solve, time_estim, res_max_val
    character(len=50) :: plot_file =  'plot_sol.gnu' !'../DWR/plot_sol.gnu'
    integer :: iPlot = 39
    real, allocatable, dimension(:,:,:,:) :: Dw ! for the least square reconstruction
    real :: l_norm
    real :: residuum
    integer :: lin_solver_not_conv

    if ( state%time_dependent ) &
      stop 'DWR method is not implemented for time-dependent problems YET!'

    time_prepare = 0.
    time_solve  = 0.
    time_estim = -1.

    if ( this%deg_plus ) then
      nsize = sum( grid%elem(:)%dof_plus )
    else
      ! change in future to dual_size add control for nonlinear problems size can be ok
      nsize = state%nsize
    endif

    if ( .not. allocated(this%b) ) &
      allocate(this%b(1:nsize), source = 0.0)
    !if ( .not. allocated(this%b1) ) &
    !  allocate(this%b1(1:nsize) , source = 0.0)
    if ( .not. allocated(this%x) ) &
      allocate(this%x(1:nsize), source = 0.0 )
    if ( .not. allocated(this%rr) ) &
      allocate(this%rr(1:nsize), source = 0.0)

    call cpu_time(t1)

    eta = 0.0

    ! not needed - fills primal RHS
    !    call FillVectorST( this%b(1:state%nsize), eta ) ! value of eta is not used, only its sign

    !filling dwr%rhs into global rhs vector
    ! false - already p+1
    print*, "init initDualLinSolver calling fillRHSvector"
    call this%fillRHSvector( grid, this%deg_plus )

    ! fill zST to DWR%x as initial solution
    if (associated(grid%elem(1)%zST)) then
      call copyZST_toLongVector( grid,  nsize, this%x )
    else
      stop 'zST is not associated!!!'
    endif


    call cpu_time(t2)
    time_prepare = time_prepare + t2 - t1

    !print*, 'Nullify the number of lin. iterations before dual problem.'
    this%linSolver_iter = 0
    !maybe globaly ?
    this%residuum = -1 !state%linSolver%residuum
    lin_solver_not_conv = 0

  end subroutine initDualLinSolver


   ! set the global vertex-wise array of the local estimates
   ! sum of pu_estims from the corresponding elements
   subroutine distributePuEstimToVertices( this, grid )
      class(DWR_t), intent(inout) :: this
      class( mesh ), intent(in) :: grid
      class( element ), pointer :: elem
      integer :: i, nelem, npoin,j , glIndex

      nelem = grid%nelem
      npoin = grid%npoin

      if (allocated( this%vertex_estim ) ) &
         deallocate( this%vertex_estim )
      allocate( this%vertex_estim(1:npoin), source = 0.0 )

      ! TRY to use in PU disrtibution to elements
      if (allocated( this%num_of_vertex_elem ) ) &
         deallocate( this%num_of_vertex_elem )
      allocate( this%num_of_vertex_elem(1:npoin), source = 0 )

      do i = 1, nelem
         elem => grid%elem(i)
         !go through vertices
         do j = 1,3
            glIndex = elem%face(idx, j)
            this%vertex_estim(glIndex) = this%vertex_estim(glIndex) + &
               this%pu_estim(i)%x(j)
         enddo

         ! number of elements per vertex
         do j = 1, elem%flen
            glIndex = elem%face(idx, j)
            this%num_of_vertex_elem(glIndex) = this%num_of_vertex_elem(glIndex) + 1
         enddo
      enddo

   end subroutine distributePuEstimToVertices

   function etaFromVertices(this, elem) result( estim )
      class(DWR_t), intent(inout) :: this
      type(element), intent(in) :: elem
      class( element ), pointer :: neighbor
      real :: estim
      integer :: i,j, npoints, glIndex

      if (ndim > 1) &
         stop 'EtaFromVertices works only for scalar case'

      npoints = elem%flen
      estim = 0.0

      do i = 1, npoints
         ! get global index of the elem vertex
         glIndex = elem%face(idx, i)
         estim = estim &
            + ( this%vertex_estim(glIndex) / this%num_of_vertex_elem(glIndex) )
      enddo

   end function etaFromVertices

        ! write one line intp the file for errors from state%estim()
   ! now it is cleaned after every adaptation
   subroutine writeNlErrorFile( this, nelem)
      class( DWR_t ) :: this
      integer, intent(in) :: nelem
!      integer, intent(in) :: iter ! number of the line
!      integer, intent(in) :: iter_lin_primal !
!      integer, intent(in) :: iter_lin_dual ! number of the lin. alg. iterations
      integer :: iFile = 42

      ! SOME NUMBERS NEED to be squarerooted !!!!!!!!!

!      call this%J%computeJu( grid )
      open( iFile, file = this%aDWR%file_error_new, action="write", position="append" )
        write(iFile,'(i6, i6, i6, i6, i6, 8es18.10)') &
         state%space%adapt%adapt_level + 1, nelem, this%aDWR%iter, &
         this%aDWR%iter_lin_primal, this%aDWR%iter_lin_dual, &
         this%J%Ju_exact, this%J%Ju, &
         this%estimDiscr, this%estimNL, &
         this%estimLP, this%estimLD
      close(iFile)

  end subroutine writeNlErrorFile


   ! write one line into the file for errors from state%estim()
   !> appending after every adaptation, only for DWR_P which is not compatible with DWR_P
   subroutine writeDWRerrorsFile( this, nelem, nsize )
      class( DWR_t ) :: this
      integer, intent(in) :: nelem,nsize
      integer :: iFile = 42
      character(len=20) :: file_error_name
      real :: t_end
      ! temporary only for AM paper

      character(len=20) :: fileNameMD = 'DWR_AMA.md'

      ! ALL NUMBERS NEED to be squarerooted !!!!!!!!!

      call cpu_time(t_end)

      ! file DWR_AMA_estim
      open( iFile, file = this%file_error_name , action="write", position="append" )
      if(state%space%adapt%adapt_level == 0) then
         write(iFile,'(a4, i8, i9, i12, 2i6, i12, 2i21, 50i14)')"# 1",2,3,4,5,6,7,8,9,10, &
              11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30, &
              31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50
         write(iFile, '(x)')
      endif
      write(iFile,'(i4, i8, i9, es12.4, 2i6, 3es20.12, 50es14.6)') &
           state%space%adapt%adapt_level, &
           nelem, nsize, (1.*nsize)**(1./3), &
           state%time%iter , state%nlSolver%TAiter,   & ! this%aDWR%iter_lin_dual, & ! 5..6
           this%J%Ju_exact, this%J%Ju, this%J%Ju_BiCG, &  ! 7..9
           abs( DWR%J%Ju_BiCG - DWR%J%Ju_exact ),   &   ! 10
           !sqrt(  state%estim(dwr_aver,1) / state%estim(dwrE,1) ), &  ! 7..10
           sqrt(state%estim( 1:max_eta, 1)),                                     &  !11..40
           t_end-state%start_time, state%space%adapt%tol_max, state%space%adapt%tol_min, & !41..43
           1.*state%linSolver%iter, 1.*state%linSolver%iter_tot,&  ! 44..45
           1.*state%time%iter    ! 46
      !, sqrt(state%estim( dwr_aver, 1)) ,  &
       !  sqrt(state%estim( dwrS, 1)) , sqrt(state%estim( dwrS_abs, 1)) , &
       !  sqrt(state%estim( dwr_dualS, 1)) , sqrt(state%estim( dwr_dualS_abs, 1)) , &
        ! sqrt(state%estim( dwrA, 1)) , sqrt(state%estim( dwr_dualA, 1)), &

      close(iFile)

      if(state%space%adapt%adapt_level == 0) then
         open( iFile, file = fileNameMD , status = 'replace', action="write" )
         write(iFile, *) 'Alev | nelem | DoF | tsteps | TAiter | linI | Atol | Ltol | J(u) | J(uh) |', &
              'etaI |etaA| eta| etaII | error |  Aeta | CPU |'
         write(iFile, *) '|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|'
      else
         open( iFile, file = fileNameMD , status = 'UNKNOWN', position="append" )
      endif
      write(iFile, 998) state%space%adapt%adapt_level, nelem, nsize, &       ! 1..3
           state%time%iter , state%nlSolver%TAiter, state%linSolver%iter_tot, & ! 4..6
           state%nlSolver%tol2, state%linSolver%tol, &                          ! 7..8
           this%J%Ju_exact, this%J%Ju, &                                        ! 9..10
           sqrt(state%estim( 21, 1)),sqrt(state%estim( 4, 1)), &               ! 11.12
           sqrt(state%estim( 21, 1))+ sqrt(state%estim( 4, 1)), &              ! 13 
           sqrt(state%estim( 1, 1)), sqrt(state%estim( 5, 1)), &               ! 14..15
           sqrt(state%estim( 24, 1)), & !
           t_end-state%start_time                                               ! 16
998   format( 6(i8 ' ' ), 2(es12.4,' '), 2(es14.6,' '), 6(es12.4,' '),  f8.1 )
999   format( 6(i8 '|' ), 2(es12.4,'|'), 2(es14.6,'|'), 6(es12.4,'|'),  f8.1  )
      close(iFile)

  end subroutine writeDWRerrorsFile



!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!! END OF DWR PROCEDURES !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!


   !> ! primal linear algebraic estimate for DWR method
   !> if the dual solution is not ready yet use norm of the residual
   subroutine computePrimalLinearDWRestimates( DWR, grid, primalRes)
    type( DWR_t ), intent(inout) :: DWR
    class( mesh ), intent(inout) :: grid
    real, dimension(:), intent(in) :: primalRes
    class( element ), pointer :: elem
    integer :: i, k, dof, Tdof, ndof, kvec, ivec, l
    integer :: nelem
    real :: estLinP
    real :: normZ
    real temp

    call state%cpuTime%startEstimTime()

    temp = 0.0 ! try DWR%x ~ zST
    estLinP = 0.0

    normZ = norm2( DWR%x)

    if (normZ < 1.E-15 ) then
      ! dual solution zST was not computed yet
      print*, 'PrimalLinearDWRestimates - zST is not ready, we use ||res|| instead'
!      print*, 'Should it be multiplied somehow? '
      estLinP = norm2( primalRes)

    else
       nelem = grid%nelem
       ivec = 0
       do i=1,nelem
            elem => grid%elem(i)
            Tdof = elem%Tdof
            dof = elem%dof
            ndof = dof * Tdof * ndim

            kvec = ivec
            do l = 1, Tdof
               do k = 1, ndim
                  elem%eta(dwrLinP,1) = dot_product( &
                     primalRes(kvec+1:kvec+dof), grid%elem(i)%zST(k,1:dof,l) )
                  estLinP = estLinP + elem%eta(dwrLinP,1)
                  kvec = kvec + dof
               enddo !k
            enddo !l
            ivec = ivec + ndof
         end do !i
         temp = dot_product( primalRes(1:state%nsize), DWR%x(1:state%nsize) )
         if ( abs(temp - estLinP) > 1.E-9)  &
            print*, 'Difference between DWR%x and zST (should be the same):' , temp - estLinP
    endif

    DWR%estimLP = abs( estLinP )

    call state%cpuTime%addEstimTime()
    !print*, 'estLinP = ' , estLinP

   end subroutine computePrimalLinearDWRestimates

  !> dual linear algebraic estimate for the DWR method
  !> compute  \f$ \eta_I^* = ( C^T*z_h - J(u_h) , u_h ) \f$
   subroutine computeDualLinearDWRestimates( DWR, grid)
    type( DWR_t ), intent(inout) :: DWR
    class( mesh ), intent(inout) :: grid
    class( element ), pointer :: elem
    integer :: i, k, dof, Tdof, ndof, kvec, ivec, l
    integer :: nelem
    real :: estLinD

    estLinD = 0.0

    nelem = grid%nelem
    ivec = 0
    do i=1,nelem
         elem => grid%elem(i)
         Tdof = elem%Tdof
         dof = elem%dof
         ndof = dof * Tdof * ndim

         kvec = ivec
         do l = 1, Tdof
            do k = 1, ndim
               elem%eta(dwrLinD,1) = dot_product( &
                  DWR%rr(kvec+1:kvec+dof), grid%elem(i)%wST(k,1:dof,l) )
!                  b(kvec+1:kvec+dof) - DWR%rhs(i)%x(k,1:dof,l), grid%elem(i)%wST(k,1:dof,l) )
               ! SUM OF abs- values ?
               !elem%eta(dwrLinD,1)  = abs(elem%eta(dwrLinD,1) )
               estLinD = estLinD + elem%eta(dwrLinD,1)
               kvec = kvec + dof
            enddo !k
         enddo !l
         ivec = ivec + ndof
      end do !i

      DWR%estimLD = abs( estLinD )
      !print*, 'estLinD = ' , estLinD

   end subroutine computeDualLinearDWRestimates





   !> evaluate the value of J_D(phi)
   !> !! works only for linear problems - we need the term in Neumann BC to be linear
   !> modification to the nonlinear problems too
   !>  due to the modification of J, we compute here its derivative_
   !>  JJ'_u(v) = J^L(u,v) - \sum_{\partial K \cap Gamma_D) ( \int_{-}( \sigmav*j_D ) + \int_{+}((\sigma + b*n)*v*j_D ) )
   !> time independent - for stationary problems only !!!
   subroutine evalElemRHS_BoundaryFlux(JJ, elem, i, rhs)
    class( BoundaryFlux_t ),intent(in) :: JJ
    class( element ), intent(in) :: elem
    integer, intent(in) :: i    ! index of element segment in the sequence JJ%supp
    real, dimension(1:ndim, 1:elem%dof, 1:elem%Tdof), intent(out) :: rhs
    type(Gauss_rule), pointer :: G_rule
    real, allocatable, dimension(:,:,:,:,:) :: K_sk
    real, allocatable, dimension(:,:) :: wi, Re_1, Rflux, phi
    real, allocatable, dimension(:,:) :: jD, bTimesN, jump
    real, allocatable, dimension(:,:,:) :: Dwi, Dphi, firstPart, secondPart
    integer :: j, Qdof, dof, iFace, l
    !logical :: partialPlus ! \partial K^+
    real :: time, sigma, val


    if (ndim>1 .or. state%time%deg >0)  &
      stop  'evalElemRHS_BoundaryFlux not done foe ndim > 1 and q>0!'

    if(JJ%supp(i) /= elem%i) then
       print*,' Troubles in evalElemRHS_BoundaryFlux RDR46F', i, JJ%supp(i), elem%i
       stop
    endif

    time = state%time%finTime

    !print*, 'what is in i???', i, JJ%suppFace(:)
    !if(i > 10) stop

    ! i = index of element in the sequence JJ%supp
    ! iface == index of edge (1,2,3) of element i
    iFace = JJ%suppFace(i) ! local index of the edge we need to integrate over
    Qdof = elem%face(fGdof, iFace )
    dof = elem%dof

    !do j=1,Qdof
    !   write(119, *) elem%xi(iFace, j, 1:2)
    !enddo
    !print*, "RDERFDER", Qdof, ndim, iRe, dof

    ! compute weighting function
    allocate(jD(1:Qdof,1:ndim) , source = 0.0)
    do j = 1, Qdof
       ! xi on the face !
       ! TODO make same of othe types of J ;+-)
       jD(j,1:ndim) = JJ%evalWeightFunction( elem%xi( iFace ,j,1:nbDim), time)
    enddo

    !!!! A(u)*grad(phi)*n !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    ! amount  of diffusivity \epsilon = 1/Re
    allocate( Re_1(1:iRe,1:Qdof), source = state%model%Re1 )
    ! compute flux, w and Dw from inside
    allocate( wi(1:Qdof, 1:ndim ), Dwi(1:Qdof, 1:ndim, 1:nbDim) )
    allocate( K_sk(1:Qdof,1:nbDim,1:nbDim,1:ndim,1:ndim) )
    allocate( Rflux(1:Qdof,1:nbDim), source = 0.0 )

    call Eval_w_Edge(elem, iFace, wi(1:Qdof,1:ndim), .false.)
    call Eval_Dw_Edge(elem, iFace, Dwi(1:Qdof, 1:ndim,1:nbDim), .false.)
    ! evaluate the matrix of the diffusion
    call Set_K_sk_scalar(ndim, nbDim, iRe, Qdof, wi(1:Qdof,1:ndim), &
            Dwi(1:Qdof,1:ndim,1:nbDim), Re_1(1:iRe,1:Qdof), &
            K_sk(1:Qdof,1:nbDim,1:nbDim,1:ndim,ndim), elem%xi(iFace ,1:Qdof, 1:nbDim) )

    ! compute the flux vector n'*A(u)
    ! SCALAR ONLY
    do l=1,Qdof
      Rflux(l, 1:nbDim) = matmul( elem%n(iFace,1:nbDim), K_sk(l,1:nbDim,1:nbDim,1,1) ) &
                            / elem%dn(iFace)
    end do

    !deallocate( wi, Dwi, K_sk, Re_1 )

    ! evaluate gradient of test functions
    ! derivatives on real element
    allocate(Dphi(1:dof, 1:nbDim, 1:Qdof) )
    call Eval_Dphi_Edge(elem, dof, iFace, Dphi(1:dof, 1:nbDim, 1:Qdof), .false.)

    ! Dphi are premultiplied by the weights !!!!!, hence
    G_rule => state%space%G_rule(elem%face(fGnum,iFace))
    do l = 1,Qdof
       Dphi(1:dof,1:2, l) = Dphi(1:dof,1:2, l) /G_rule%weights(l)
    enddo


    ! FR TODO ???
    allocate( firstPart(0:dof,1:Qdof, 1:ndim), source = 0.0 )
    allocate( secondPart(0:dof,1:Qdof, 1:ndim), source = 0.0 )

    ! multiply (n'*A)*grad(phi)
    !do j=1,Qdof
    !   firstPart(0, j,1:ndim) =  dot_product( Dwi(j,1, 1:nbDim), Rflux(j, 1:nbDim) ) ! only test
    !enddo

    do l= 1,dof
       do j = 1,Qdof
          firstPart(l, j,1:ndim) =  dot_product( Dphi(l, 1:nbDim, j), Rflux(j, 1:nbDim) )
       enddo
    end do

    !deallocate( Rflux, Dphi)

    !!! 2nd PART

    ! compute the added part, differs if \gamma \in \partial K+ or \partial K-
    ! compute b*n and partial PLUS/MINUS
    allocate( bTimesN(1:Qdof,1:ndim), source = 0.0 )

    ! b*n is only in \partial K^+
    call computeBtimesN( elem, iFace, Qdof, bTimesN(1:Qdof,1:ndim), .true.)
    if(norm2(bTimesN(:, 1)) > 0.)    print*, 'not tested for b/=0 !   TRF54ED67HG'

    ! add sigma
    sigma = elem%d_gamma * state%model%Re1 / elem%dn(iFace)

    bTimesN(1:Qdof,1:ndim) = bTimesN(1:Qdof,1:ndim) + sigma

    !evaluate test functions
    allocate( phi(1:dof,1:Qdof) )
    call Eval_Phi_Edge(elem, dof, iFace, phi(1:dof,1:Qdof), .false.)


    ! only SCALAR
    do l = 0, dof
       if(l == 0) then
       !   secondPart(0,1:Qdof,1) = bTimesN(1:Qdof,1) * wi(1:Qdof,1)   ! only for test
       else
          do j = 1,ndim
             secondPart(l,1:Qdof,j) = bTimesN(1:Qdof,j) * phi(l,1:Qdof)
          end do
       endif

       ! put together
       firstPart(l,1:Qdof,1:ndim) = jD(1:Qdof,1:ndim) * &
            ( firstPart(l, 1:Qdof,1:ndim) - secondPart(l,1:Qdof,1:ndim) )
    enddo

    ! integrate over the edge iFace,   only for q = 0 (constant in time)
    do l = 1,dof
      call IntegrateFunctionEdge( elem, iFace, firstPart(l,1:Qdof,1), rhs(1, l , 1) )
   end do

   !if(elem%i == 3) then
   !   call IntegrateFunctionEdge( elem, iFace, firstPart(0,1:Qdof,1), val)
   !   write(*,'(a8, i5, 30es12.4)') "J(u,u)",elem%i, val, &
   !        dot_product(rhs(1, 1:dof , 1) , elem%wActual(1,1:dof))
   !endif

    deallocate(jD, phi, firstPart, secondPart, bTimesN)
    deallocate( wi, Dwi, K_sk, Re_1 )
    deallocate( Rflux, Dphi)

    !print*,'end subroutine evalElemRHS_BoundaryFlux'
  end subroutine evalElemRHS_BoundaryFlux

  !> weightFun = ( zSTplus - I_h^p (zSTplus) )
  !> according to wType zSTplus is either local reconstruction or global sol. of higher degree
  subroutine setWeightFun_primalEst( elem , wType, weightFun)
    class( element ), intent(in) :: elem
    character(len=20), intent(in) :: wType
    real, dimension(:,:), intent(out) :: weightFun
    integer :: nd, dof

    nd = size(weightFun, 1)
    dof = size(weightFun, 2)

    if (dof <= elem%dof ) &
      stop "dof in setWeightFun_dualEst should be bigger than elem%dof"

    ! FR_DEGPLUS

    if ( wType == "GlobSol") then
      if ( size(elem%zSTplus,1) < nd .or. size(elem%zSTplus,2) < dof ) then
        stop "Too small zSTplus in setWeightFun_primalEst"
      end if
      weightFun(1:nd,1:dof) = elem%zSTplus(1:nd, 1:dof, 1)

    else if (wType == "LocRitz") then
      if ( size(elem%zST_Ritz,1) < nd .or. size(elem%zST_Ritz,2) < dof ) then
        stop "Too small zST_Rizt in setWeightFun_primalEst"
      end if
      weightFun(1:nd,1:dof) = elem%zST_Ritz(1:nd, 1:dof,1)


    else if (wType == "LocLS") then

      if ( size(elem%zST_LS,1) < nd .or. size(elem%zST_LS,2) < dof ) then
        stop "Too small zST_Ritz in setWeightFun_primalEst"
      end if
      weightFun(1:nd,1:dof) = elem%zST_LS(1:nd, 1:dof, 1)

    else
        stop "unknown type "
    end if

    weightFun(1:nd,1:elem%dof) = 0.0  ! weight = ( wSTplus - I_h^p (wSTplus) )


  end subroutine setWeightFun_primalEst

  !> weightFun = ( wSTplus - I_h^p (wSTplus) )
  !> according to wType wSTplus is either local reconstruction or global sol. of higher degree
  subroutine setWeightFun_dualEst( elem , wType, weightFun)
    class( element ), intent(in) :: elem
    character(len=20), intent(in) :: wType
    real, dimension(:,:), intent(out) :: weightFun
    integer :: nd, dof, ifile

    nd = size(weightFun, 1)
    dof = size(weightFun, 2)

    if (dof <= elem%dof ) &
      stop "dof in setWeightFun_dualEst should be bigger than elem%dof"


    if ( wType == "GlobSol") then
    ! FR_DEGPLUS - not done wSTplus
!      if ( size(elem%wSTplus,1) < nd .or. size(elem%wSTplus,2) < dof ) then
!        stop "Too small wSTplus in setWeightFun_dualEst"
!      end if
      weightFun(1:nd,1:dof) = elem%wST_Ritz(1:nd, 1:dof,1)
      if (elem%i == 1) &
        print*, "GlobSol weight is not done! wSTplus is replaced by wST_Ritz"
      !weightFun(1:nd,1:dof) = elem%wSTplus(1:nd, 1:dof, 1)

    else if (wType == "LocRitz") then
      if ( size(elem%wST_Ritz,1) < nd .or. size(elem%wST_Ritz,2) < dof ) then
        stop "Too small wSTplus in setWeightFun_dualEst"
      end if
      weightFun(1:nd,1:dof) = elem%wST_Ritz(1:nd, 1:dof,1)

      !ifile = 500 + 10 * state%space%adapt%adapt_level
      !call PlotElemFunction3D(ifile+1, elem, dof, elem%wST_Ritz( 1:ndim, 1:dof, 1) )

    else if (wType == "LocLS") then

      if ( size(elem%wST_LS,1) < nd .or. size(elem%wST_LS,2) < dof ) then
        stop "Too small wST_LS in setWeightFun_dualEst"
      end if
      weightFun(1:nd,1:dof) = elem%wST_LS(1:nd, 1:dof, 1)

    else
        stop "unknown type "
    end if

    weightFun(1:nd,1:elem%dof) = 0.0  ! weight = ( wSTplus - I_h^p (wSTplus) )


  end subroutine setWeightFun_dualEst

  !> test for DWR arrays x, b, rr
  !> subroutine verifies 1) if the array is allocated 2) if it has the given size, otherwise, it is allocated
  subroutine DWRarrays_allocated_size ( DWR, nsize)
    type( DWR_t ), intent(inout) :: DWR
    integer, intent(in) :: nsize

    ! array DWR%b
    if (allocated(DWR%b) ) then
       if(size(DWR%b) /= nsize) then
          ! reallocation
          deallocate(DWR%b)
          allocate(DWR%b(1:nsize) )
       endif
    else
       ! new allocation
       allocate(DWR%b(1:nsize) )
    endif

    ! array DWR%x
    if (allocated(DWR%x) ) then
       if(size(DWR%x) /= nsize) then
          ! reallocation
          deallocate(DWR%x)
          allocate(DWR%x(1:nsize) )
       endif
    else
       ! new allocation
       allocate(DWR%x(1:nsize) )
    endif

    ! array DWR%rr
    if (allocated(DWR%rr) ) then
       if(size(DWR%rr) /= nsize) then
          ! reallocation
          deallocate(DWR%rr)
          allocate(DWR%rr(1:nsize) )
       endif
    else
       ! new allocation
       allocate(DWR%rr(1:nsize) )
    endif

  end subroutine DWRarrays_allocated_size

end module dwr_mod
