module dwr_res

   use dwr_mod
   use elemental_mod
   use eval_rtn_st_mod
   use eval_jumps
   use eval_sol
   use euler_problem
   use main_data  ! contains type(mesh) :: grid for computation
   use paramets
   use problem_oper
   use target_functional_mod
   use weight_fun_mod
   use model2DNS


   implicit none

   public :: DWRAniEstimElemDual
   public :: DWRAniEstimElemDual_Edges
   public :: DWRAniEstimElemDual_Edges_Scalar
   public :: DWRAniEstimElemDual_Edges_Euler
   public :: DWRAniEstimElemDual_Volume
   public :: DWRAniEstimElemDual_Volume_Scalar
   public :: DWRAniEstimElemDual_Volume_Euler


   public :: DWREdgeEstim_Euler
   public :: DWRAniEstimElemPrimal
   public :: DWREdgeEstim_convection
   public :: DWREdgeEstim_diffusion
   public :: DWREdgeDiffusiveNorm
   public :: DWRElemEstim_ComputeBoundaryNorms
   public :: DWRElemEstim_Edge1
   public :: DWRElemEstim_Edge2
   public :: DWRElemEstim_Volume

   public :: prepareDualSolutionAndFluxes
   public :: setDWRweightingBoundaryFunctions

contains

   !> DWR estimation of the VOLUME residual over one element
   !> STATIONARY PROBLEMS ONLY
   !> 1) linear C-D eq: compute || f + \nabla . A \nabla w_h - \nabla .( \bkb w_h) - c u_h ||_K
   !> 2) Euler eq: || sum_{s=1}^{nbDim}  A_s(w) * dw/dx_s ||_K
   function DWRElemEstim_Volume(elem) result( estim )
      type(element), intent(in) :: elem
      real, dimension(1:ndim) :: estim
      integer :: i, k, l, Qnum, tQnum, dof, Qdof
!      real, dimension(1:ndim, 1:elem%dof, 1:elem%Tdof) :: f
!      real, dimension(1:ndim, 1:nbDim, 1:elem%Qdof ) :: sigma
!      real, dimension(1:ndim,1:elem%Qdof) :: rhs
      real, dimension(1:elem%Qdof,1:ndim) :: wi ! res, divSigma, wi, S
!      real, dimension(1:elem%Qdof,1:nbDim, 1:ndim) :: sigma2 ! transposed sigma
      real, dimension(1:elem%Qdof, 1:ndim, 1:nbDim) :: Dwi
      real, dimension(1:elem%Qdof,1:nbDim) :: x

      real, allocatable, dimension(:,:) :: rhs, res, divSigma, S
      real, allocatable, dimension(:,:,:) :: f, sigma, sigma2 ! transposed sigma
      real, allocatable, dimension(:,:,:,:) :: A_s

      Qnum = elem%Qnum
      tQnum = elem%TQnum ! should be one for stationary problems
      dof = elem%dof
      Qdof = elem%Qdof

      !!!!!!! compute the rezidual term in S integration points

      ! for stationary problems, time is not relevant
      ! compute RHS in volume quadrature nodes
      ! do not use RHS but f !!!
      x(1:Qdof, 1:nbDim) = state%space%V_rule(Qnum)%lambda(1:Qdof,1:nbDim)

      call Eval_w_Elem(elem, wi(1:Qdof,1:ndim) )
      call Eval_Dw_Elem(elem, Dwi(1:Qdof, 1:ndim, 1:nbDim) )

      allocate( res(1:Qdof, 1:ndim), source = 0.0)

      !!! SCALAR CONVECTION-DIFFUSION EQ
      if (ndim == 1) then

        allocate( f(1:ndim, 1:elem%dof, 1:elem%Tdof), source = 0.0)
        allocate( sigma(1:ndim, 1:nbDim, 1:Qdof ), source = 0.0)
        allocate( sigma2(1:elem%Qdof,1:nbDim, 1:ndim), source = 0.0 )
        allocate( rhs(1:ndim,1:elem%Qdof), source = 0.0 )
        allocate( divSigma(1:Qdof,1:ndim), source = 0.0 )
        allocate( S(1:Qdof,1:ndim), source = 0.0 )

        !call ComputeF( elem, Qdof, x, Fx )
        ! Fxi == elem%xi(0,:,:)
        if ( .not. state%model%subdomainRHS  .or. elem%iSubMesh == -1) then
           do k=1, Qdof
              call RHS_Scalar( elem%xi(0,k, 1:nbDim), rhs(1:ndim, k), state%time%ctime )
           enddo
        else
           rhs(:,:) = 0.
        endif

        !if(elem%i == 10 .or. elem%i== 236) then
        !   write(*,'(a10, i5, 50es12.4)') 'rhsP:', elem%i, rhs(1,:)
        !endif

        !if (ndim>1) stop 'ndim>1 not implemented in DWRElemEstim_Volume'
        ! flux - ( A \nabla u - \bkb u ) in ITEGRATION nodes !
        sigma(1:ndim, 1:nbDim, 1:elem%Qdof) = EvalRTNFluxVolume( &
                  Set_f_s_scalar, Set_R_s_scalar, elem, state%space%V_rule( elem%Qnum ) )

        ! transpose sigma
        do k = 1,ndim
          sigma2( 1:elem%Qdof,1:nbDim, k ) = transpose( sigma(k, 1:nbDim, 1:elem%Qdof) )
        end do

        ! compute the divergence in int nodes
        ! sigma - flux in quad nodes
        ! divSigma - its divergence in quad nodes
        ! divF needs the integration nodes XI in the REFFERENCE TRIANGLE !!!
        call EvalDiv_F( elem, Qnum, Qdof, x(1:Qdof, 1:nbDim), &
                sigma2( 1:Qdof, 1:nbDim, 1:ndim), divSigma(1:Qdof, 1:ndim) )


        ! the reaction part !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
        ! scalar eq only
        call Set_S_scalar(ndim, nbDim, Qdof, elem%xi(0,1:Qdof, 1:nbDim), wi(1:Qdof,1:ndim), &
              Dwi(1:Qdof, 1:ndim, 1:nbDim), S(1:Qdof, 1:ndim) )

        ! put everything together to one array and compute ||.||_K
        res(1:Qdof, 1:ndim) = transpose(rhs(1:ndim, 1:elem%Qdof)) - divSigma(1:Qdof,1:ndim) &
                              - S(1:Qdof, 1:ndim)

        !print*, 'elem = ', elem%i, 'fluxDiv: ' , divSigma(1,1:ndim), 'rhs: ' , rhs(1,1) !, "react = " ,S(1, 1:ndim)
        deallocate( f, sigma, sigma2, rhs, divSigma, S)

!!!!!!! EULER EQUATION !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      else if (ndim == 4 .and. state%model%Re == 0.0 .and. state%modelName == 'NSe') then
        allocate( A_s(1:Qdof,1:nbDim,1:ndim,1:ndim), source = 0.0)
        call Set_A_s_Euler(ndim, nbDim, Qdof, wi(1:Qdof,1:ndim), &
              A_s(1:Qdof,1:nbDim,1:ndim,1:ndim), x(1:Qdof,1:nbDim), 0)
              ! last argument is not used

         do k = 1, Qdof
            do l = 1,nbDim
            ! A_s(w)*(dw/dx_s)
              res(k, 1:ndim) = res(k, 1:ndim) + &
                matmul( A_s(k,l,1:ndim,1:ndim), Dwi(k,1:ndim, l) )
            end do !s
         end do

        deallocate( A_s )
      else
        print*, "DWRElemEstim_Volume: This kind of model is not done!"
        print*, "Only scalar linear c-d eq or Euler eq is implemented"
        stop "DWRElemEstim_Volume:"
      end if

      ! compute the L2 norm squared
      call IntegrateSquareVectorFunction2( elem, res(1:Qdof, 1:ndim), estim(1:ndim) )

      deallocate( res )

   end function DWRElemEstim_Volume

   !> DWR estimation of the VOLUME DUAL residual over one element
   !> STATIONARY PROBLEMS ONLY
   !> compute || j_om + \nabla * (A \nabla z_h) + \bkb * grad(z_h) - c z_h ||_K
   function DWRAniEstimElemDual_Volume(elem, J) result( estim )
      type(element), intent(in) :: elem
      class( Target_functional_t ),intent(in) :: J ! target functional
      real, dimension(1:ndim) :: estim

      if (state%modelName == 'scalar') then
         estim(1:ndim) = DWRAniEstimElemDual_Volume_Scalar(elem, J)
         
      else if (ndim == 4 .and. state%model%Re == 0.0 .and. state%modelName == 'NSe') then
         estim(1:ndim) = DWRAniEstimElemDual_Volume_Euler(elem, J)

     else
        stop "DWRAniEstimElemDual_Volume: unknown kind of problem!"
      end if

   end function DWRAniEstimElemDual_Volume




   !> DWR estimation of the VOLUME DUAL residual over one element
   !> STATIONARY PROBLEMS ONLY
   !> compute || j_om + \nabla * (A \nabla z_h) + \bkb * grad(z_h) - c z_h ||_K
   function DWRAniEstimElemDual_Volume_Scalar(elem, J) result( estim )
      type(element), intent(in) :: elem
      class( Target_functional_t ),intent(in) :: J ! target functional
      real, dimension(1:ndim) :: estim
      integer :: i, k, l, Qnum, dof, Qdof
      real, dimension(:,:), allocatable :: divSigma, zi, wi, S, rhs, res
      real, dimension(:,:), allocatable :: x
      real, dimension(:,:,:), allocatable :: Dzi, Dwi
      real, dimension(:,:), allocatable ::  Re_1, ones
      real, dimension(:,:,:), allocatable :: convVec, R_s_inner
      real, dimension(:,:,:,:), allocatable :: convVec2
      real, allocatable, dimension(:,:,:,:,:) :: K_sk
      logical :: iprint = .false.

      !if(elem%i == 555) iprint = .true.
      !if(elem%i == 155) iprint = .true.
      !if( abs(elem%xc(1)-0.5) <= 1E-1) iprint = .true.
      
      if (state%modelName /= 'scalar') &
        stop "DWRAniEstimElemDual_Volume_Scalar only for scalar linear C-D problems"


      Qnum = elem%Qnum
      !tQnum = elem%TQnum ! should be one for stationary problems
      dof = elem%dof
      Qdof = elem%Qdof

      allocate( wi(1:Qdof, 1:ndim), zi(1:Qdof, 1:ndim), source = 0.0)
      allocate( Dwi(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )
      allocate( Dzi(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )

      allocate( divSigma(1:Qdof, 1:ndim), S(1:Qdof, 1:ndim), source = 0.0)
      allocate( rhs(1:Qdof, 1:ndim), res(1:Qdof, 1:ndim), source = 0.0 )
      allocate( x(1:Qdof, 1:nbDim), source = 0.0 )

      if(iprint) print*,'ERDE#$ER:',  J%boundary, elem%iSubMesh

      if(iprint) write(*,'(a8, 40es12.4)') 'ws A',rhs(1:Qdof,1)
      
      ! compute rhs from the DWR%J
      ! has the functional J a volume part & is elem in the support
      if (.not. J%boundary .and. elem%iSubMesh == 1) then
         ! compute j_\om in the integration nodes
         do k = 1,Qdof
            ! iWeight - index of the weight fun, ! TODO should be in INI in future!
            ! does not depend on time now, only stationary problems work !
            rhs(k,1:ndim) = J%evalWeightFunction( elem%xi(0,k, 1:nbDim), state%time%ctime )
         end do
         !if(elem%i == 10 .or. elem%i== 236) then
         !   write(*,'(a10, i5, 50es12.4)') 'rhsD:', elem%i, rhs(:,1)
         !endif
      else ! j_\om == 0.0
         rhs(1:Qdof,1:ndim) = 0.0
      endif

      if(iprint) write(*,'(a8, 40es12.4)') 'ws rhs',rhs(1:Qdof,1)

      if (ndim>1) stop 'ndim>1 not implemented in DWRAniEstimElemDual_Volume'

      ! compute z, grad(z)
      do l = 1,ndim
        ! only for stationary problems ie q = 0
         !VD
         call Eval_func_Elem(elem, dof, elem%zST(l,1:dof,1), zi(1:Qdof,l) ) ! NOT USED
         call Eval_func_Elem(elem, dof, elem%wST(l,1:dof,1), wi(1:Qdof,l) )
      end do ! l

      ! only for stationary problems ie q = 0
      call Eval_DVecTransposed_Elem(elem, dof, elem%wST(1:ndim,1:dof,1), &
        Dwi(1:Qdof,1:ndim,1:nbDim) )
      
      call Eval_DVecTransposed_Elem(elem, dof, elem%zST(1:ndim,1:dof,1), &
        Dzi(1:Qdof,1:ndim,1:nbDim) )

!      print*, 'zi = ', zi
!      print*, 'Dzi = ', Dzi

      ! compute A(\grad(z))
      allocate( R_s_inner(1:Qdof, 1:nbDim, 1:ndim), source=0.0 )
      ! amount  of diffusivity \epsilon = 1/Re
      allocate( Re_1(1:iRe,1:Qdof), source = state%model%Re1 )
      ! can contains the model parameters
      Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(0, 1:Qdof, 2+1:2+iRe-1) )

      ! compute the flux -- wrong variant, works only for linear problem
      !call Set_R_s_scalar( ndim, nbDim, iRe, Qdof, zi(1:Qdof,1:ndim), &
      !     Dzi(1:Qdof, 1:ndim, 1:nbDim), Re_1(1:iRe,1:Qdof), &
      !     R_s_inner(1:Qdof, 1:nbDim, 1:ndim) , elem%xi(0 ,1:Qdof, 1:nbDim) )

      ! compute the linearizared flux
      allocate( K_sk(1:Qdof,1:nbDim,1:nbDim,1:ndim,1:ndim), source = 0.0 )

      call Set_K_sk_scalar(ndim, nbDim, iRe, Qdof, wi(1:Qdof,1:ndim), &
            Dwi(1:Qdof,1:ndim,1:nbDim), Re_1(1:iRe,1:Qdof), &
            K_sk(1:Qdof,1:nbDim,1:nbDim,1:ndim,ndim), elem%xi(0,1:Qdof, 1:nbDim) )

      ! ndim == 1 !!!!!!!!!!!!!!
      R_s_inner(1:Qdof, 1:nbDim, 1) = 0.
      do i=1,nbDim
         do k=1,nbDim
            R_s_inner(1:Qdof, i, 1) = R_s_inner(1:Qdof, i, 1) &
                 + K_sk(1:Qdof, i, k, 1, 1) * Dzi(1:Qdof, 1, k)
         enddo
      enddo
            
      deallocate(Re_1)
!      print*, 'R_s_inner(', R_s_inner

      ! compute div( A(\grad(z)) )
      ! divF needs the integration nodes XI in the REFFERENCE TRIANGLE !!!
      x(1:Qdof, 1:nbDim) = state%space%V_rule(Qnum)%lambda(1:Qdof,1:nbDim)
      call EvalDiv_F( elem, Qnum, Qdof, x(1:Qdof, 1:nbDim), &
              R_s_inner(1:Qdof, 1:nbDim, 1:ndim), divSigma(1:Qdof, 1:ndim) )
      deallocate( R_s_inner )
!      print*, 'divergence = ', divSigma

      if(iprint) write(*,'(a8, 40es12.4)') 'ws divR',divSigma(1:Qdof,1)

      ! compute b * \grad(z)
      ! for LINEAR convection we can call b(v) with v = 1.0
      ! but for nonlinear we call b(v) with w(i)!!
!!!allocate( ones(1:Qdof, 1:ndim), source = 1.0 )
      !!allocate( convVec(1:Qdof,1:nbDim,1:ndim), source = 0.0 )
      allocate( convVec2(1:Qdof,1:nbDim,1:ndim, 1:ndim), source = 0.0 )
      !call Set_f_s_scalar(ndim, nbDim, Qdof, ones(1:Qdof,1:ndim), &
      !      convVec(1:Qdof,1:nbDim,1:ndim), elem%xi(0 ,1:Qdof, 1:nbDim), 0 )

      !!call Set_f_s_scalar(ndim, nbDim, Qdof, wi(1:Qdof,1:ndim), &
      !!      convVec(1:Qdof,1:nbDim,1:ndim), elem%xi(0 ,1:Qdof, 1:nbDim), 0 )

      ! the following has to correspond with Set_A_s_scala
      call Lin_f_s_scalar(ndim, Qdof, wi(1:Qdof,1:ndim), &
            convVec2(1:Qdof,1:nbDim,1:ndim, 1:ndim), elem%xi(0 ,1:Qdof, 1:nbDim)) 

      
      ! 0 not used in SCALAR
      !!deallocate( ones )
      ! div( A(\grad(z)) ) + b * \grad(z)
      do i = 1,Qdof
        do l = 1,ndim
          divSigma(i, l) = divSigma(i, l) + &
                dot_product( convVec2(i,1:nbDim,l,l) , Dzi(i,l,1:nbDim) )  ! VD correction
          !dot_product( convVec(i,1:nbDim,l) , Dzi(i,l,1:nbDim) ) 
        end do
     end do
     if(iprint) write(*,'(a8, 40es12.4)') 'ws divf',divSigma(1:Qdof,1)

!      print*, 'conv vec = ' , convVec
     !deallocate(convVec)
     deallocate(convVec2)

      ! the reaction part !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
      call Set_S_scalar(ndim, nbDim, Qdof, elem%xi(0,1:Qdof, 1:nbDim), zi(1:Qdof,1:ndim), &
            Dzi(1:Qdof, 1:ndim, 1:nbDim), S(1:Qdof, 1:ndim) )

      if(norm2(S(:, 1)) > 1E-5) print*,'NOT CORRECT FOR NON-Linear reaction!!! RD4Ed5'

      if(iprint) write(*,'(a8, 40es12.4)') 'ws  S ',S(1:Qdof,1)

      ! put everything together to one array and compute ||.||_K
      res(1:Qdof, 1:ndim) = rhs(1:Qdof,1:ndim) + divSigma(1:Qdof,1:ndim) &
                            - S(1:Qdof, 1:ndim)

      if(iprint) write(*,'(a8, 40es12.4)') 'ws res',res(1:Qdof,1)

      !write(*,'(a8, 60es12.4)') 'res z:',res(1:Qdof, 1)
!      print*, 'res: ' , res
!      print*, 'rhs: ' , rhs(1,1)
      ! compute the L2 norm
      call IntegrateSquareVectorFunction2( elem, res(1:Qdof, 1:ndim), estim(1:ndim) )

      deallocate(divSigma, zi, S, rhs, res, x, Dzi, wi, Dwi, K_sk)
      
   end function DWRAniEstimElemDual_Volume_Scalar



   !> DWR estimation of the EDGE residual over one element
   !> STATIONARY PROBLEMS ONLY, EXTENDED to NONLINEAR problems
   !> compute || R_{K,B} ||^2_{\partial K}, where
   !> R_{K,B} = - \sigma*[w_h] - 0.5*[\mK grad(w_h)]*n    on \partial K^+ \ \dom
   !> R_{K,B} = - \sigma*[w_h] - 0.5*[\mK grad(w_h)]*n + b*n[w_h]   on \partial K^- \ \dom
   !> R_{K,B} = g_N - \mK grad(w_h)*n   on \partial K^+ \cap \Gamma_N
   !> R_{K,B} = -\sigma (w_h - u_D)   on \partial K^+ \cap \Gamma_D
   !> R_{K,B} = (b*n - \sigma)*(w_h - u_D)  on \partial K^- \cap (\Gamma_D \cup \Gamma^-)
   !> R_{K,B} = 0 otherwise
   !> \mK - diffusion matrix, b - convective vector, n - outer normal vector
   !> \sigma - penalty in J_h^\sigma
   !> result is SQUARED
   !> FOR SCALAR problem with LINEAR CONVECTION ONLY !!!!!!!!!!!!!
   function DWRElemEstim_Edge1(elem) result( estim )
      type(element), intent(inout) :: elem
      real, dimension(1:ndim) :: estim
      integer :: i, ie, k, Qnum, tQnum, dof, Qdof
      real, dimension(1:ndim) :: integral
      real, dimension(:,:), allocatable :: estimConv, estimDiff
!      logical :: partialPlus

      estim(1:ndim) = 0.0
      elem%max_eigenvals = 0.
      
      do ie = 1, elem%flen
        Qnum = elem%face(fGnum,ie)
        Qdof = state%space%G_rule(Qnum)%Qdof
        if(Qdof /= elem%face(fGdof,ie)) print*, '## Trouble in DWRElemEstim_Edge1'

        allocate( estimConv(1:Qdof, 1:ndim), source = 0.0 )

        ! Scalar
        if (ndim == 1) then
          allocate( estimDiff(1:Qdof, 1:ndim), source = 0.0 )


          ! compute the convective part of the estimate
          ! also gives whether the edge is part of \partial K^+
          ! (\sigma - b*n)[w_h] or something similar for other type of faces
          call DWREdgeEstim_convection( elem, ie, Qdof, estimConv(1:Qdof, 1:ndim) )

          ! compute the diffusive part of the estimate
          call DWREdgeEstim_diffusion( elem, ie, Qdof, estimDiff(1:Qdof, 1:ndim) )
  !        print*, 'estim Diff:' ,maxval(estimDiff) ,minval(estimDiff)
          ! put together and compute the L2 norm
          estimConv(1:Qdof,1:ndim) = estimConv(1:Qdof,1:ndim) + estimDiff(1:Qdof,1:ndim)
          deallocate(estimDiff)

        ! Euler
        else if (ndim == 4 .and. state%model%Re == 0.0 .and. state%modelName == 'NSe') then
          call DWREdgeEstim_Euler(elem, ie, Qdof, estimConv(1:Qdof,1:ndim))

        else
          stop "DWRElemEstim_Edge1 is not done for this kind of problem!"
        end if

        ! L2 norm squared
        call IntegrateFunctionVecEdgeSquared( elem, ie, ndim, estimConv(1:Qdof,1:ndim), integral(1:ndim) )

        !if(minval( elem%face(neigh,:)) <= 0) then
        !   write(*,'(a8, 2i5,a1,3i5,300es12.4)') &
        !        'estimB', elem%i, ie,'|', elem%face(neigh,:), norm2(integral(1:ndim)) !,estim(1:ndim)
        !endif
        
           !print*,'########################################################################'
        !endif
        
        ! add it to the element value
        estim(1:ndim) = estim(1:ndim) + integral(1:ndim)
!        print*, 'estimB = ', estim

        deallocate( estimConv )

      end do ! ie

   end function DWRElemEstim_Edge1

   !> DWR estimation of the EDGE residual over one element
   !> STATIONARY PROBLEMS ONLY
   !> compute || R_{K,D} ||^2_{\partial K}, where
   !> R_{K,D} = 0.5 * [w_h] on inner faces
   !> R_{K,D} = -(u_D - w_h)  on Dirchlet boundary faces
   !> result is SQUARED
   !> returns 0.0 for Euler eq.
   function DWRElemEstim_Edge2(elem) result( estim )
      type(element), intent(in) :: elem
      real, dimension(1:ndim) :: estim
      real, dimension(1:ndim) :: integralEdge
      integer :: ie

      estim(1:ndim) = 0.0
      ! linear scalar
      if (ndim == 1) then
         ! go through all edges
         do ie = 1, elem%flen
           ! compute L2 norm^2 for one face
           call integrateEdgeJumpDWR(elem, ie, integralEdge(1:ndim) )
           estim(1:ndim) = estim(1:ndim) + integralEdge(1:ndim)
         end do !ie
      ! Euler
      else if (ndim == 4 .and. state%model%Re == 0.0 .and. state%modelName == 'NSe') then
         estim(1:ndim) = 0.0
      else
         stop "DWRElemEstim_Edge2 not done fir this kind of problem"
      endif

   end function DWRElemEstim_Edge2

   !> DWR estimation of the EDGE residual over one element
   !> STATIONARY PROBLEMS ONLY
   !> compute || R_{K,D} ||^2_{\partial K}, where
   !> R_{K,D} = 0.5 * [w_h] on inner faces
   !> R_{K,D} = -(u_D - w_h)  on Dirchlet boundary faces
   !> result is SQUARED
   function DWRElemEstimDual_Edge2(elem) result( estim )
      type(element), intent(in) :: elem
      real, dimension(1:ndim) :: estim
      real, dimension(1:ndim) :: integralEdge
      integer :: ie

      stop 'not DONE!!!'

!      estim(1:ndim) = 0.0
!
!      ! go through all edges
!      do ie = 1, elem%flen
!        ! compute L2 norm^2 for one face
!        call integrateEdgeJumpDWR(elem, ie, integralEdge(1:ndim) )
!        estim(1:ndim) = estim(1:ndim) + integralEdge(1:ndim)
!      end do !ie

      if (ndim>1) stop 'ndim>1 not implemented in DWRElemEstim_Edge2'

   end function DWRElemEstimDual_Edge2



   !> DWR estimation of the EDGE residual over one element
   !> STATIONARY PROBLEMS ONLY
   !> compute || R^*_{K,D} ||^2_{\partial K}, where
   !> R_{K,D} = 0.5 * [z_h] on inner faces
   !> R_{K,D} = (j_D + z_h)  on Dirichlet boundary faces
   !> R_{K,D} = 0 elsewhere
   !> result is SQUARED
   function DWRAniEstimElemDual_Edge2(elem, J) result( estim )
      type(element), intent(in) :: elem
      ! ?? real, dimension(1:Qdof,1:ndim) :: zi

      class( Target_functional_t), intent(in) :: J
      real, dimension(1:ndim) :: estim
      real, dimension(1:ndim) :: integralEdge
      integer :: ie

      print*, 'not done !'
      stop 'cdsa'

!      estim(1:ndim) = 0.0
!
!      ! go through all edges
!      do ie = 1, elem%flen
!        ! compute L2 norm^2 for one face
!        call integrateEdgeJumpDWR(elem, ie, integralEdge(1:ndim) )
!        estim(1:ndim) = estim(1:ndim) + integralEdge(1:ndim)
!      end do !ie
!
!      if (ndim>1) stop 'ndim>1 not implemented in DWRElemEstim_Edge2'

   end function DWRAniEstimElemDual_Edge2


   !> compute the convective part of the estimate DWRElemEstim_Edge1
   !> on one edge in edge integration nodes
   !> different value on Inner/outer edges and \partial K+ / \partial K- edges
   !> estim = (bn* - sigma) [w_h] where bn = min(bn,0),
   !>       = 0 on non-Dirichlet boundary edges
   !> new version - PartialTrue has no sense now!
   !>             - bn = min(bn,0) in every Quadrature node - this gives us consistency (only the quadrature may be not precise enough?)
   !> IN primal estimates b*n is only on \partial K^MINUS !!!!!
   subroutine DWREdgeEstim_convection( elem, ie, Qdof, estim)
     class(element), intent(inout) :: elem
     integer, intent(in) :: ie
     integer, intent(in) :: Qdof
     real, dimension(1:Qdof,1:ndim), intent(out) :: estim
     !      logical, intent(out) :: partialPlus
     !real, dimension(1:Qdof,1:ndim) :: jump, bTimesN, ones
     real, dimension(:,:), allocatable :: jump, bTimesN, diff_F
     real, dimension(1:Qdof,1:nbDim,1:ndim) :: convVec
     class(element), pointer :: elem1
     real, allocatable, dimension(:) :: penal, penal2
     real, allocatable, dimension(:,:) :: Re_1
     integer :: i, k, l, ii
     real :: d_gamma

     !allocate( bTimesN(1:Qdof,1:ndim) )
     allocate(jump(1:Qdof,1:ndim), diff_F(1:Qdof,1:ndim)) 

     !! compute convection b*n  -- WORKS ONLY for linear convection and upwind numerical flux (Vijayasundaram)
     !! we need b*n ONLY on \partial K^- otherwise we use ZERO
     !! there is NO term with b*n on \partial K^+
     !call computeBtimesN( elem, ie, Qdof, bTimesN(1:Qdof,1:ndim), .false. )

     ! computation of difference between the physical and numerical fluxes
     ! nonlinear alternative of  computeBtimesN
     call compute_diff_fluxes( elem, ie, Qdof, diff_F(1:Qdof,1:ndim) )
     
     ! inner /outer
     ii = elem%face(neigh, ie)
     !print*,
     !if (ii <= 0) print*, 'iBC:', elem%iBC(ie),'tBC:', elem%tBC(ie)

     ! boundary edge without DIRICHLET BC -> do nothing
     ! elem%tBC(ie) == 0   => Dirichlet,  see set_sol.f90
     if ( (ii <= 0) .and. elem%tBC(ie) /= 0 ) then
        estim(1:Qdof, 1:ndim) = 0.0
     else ! inner edges or Dirichlet
        ! parameter from J_h^sigma
        !sigma = elem%d_gamma * state%model%Re1 / elem%dn(ie)

        ! FR comment the following, setPenalty_new is used instead

        !allocate(penal(1:Qdof)  ,  Re_1(1:iRe,1:Qdof), source = 0.0  ) ! penalty & Re_1
        !if(state%model%Re > 0.) Re_1(1,1:Qdof) = 1./state%model%Re
        !! can contain the material parameters
        !Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(ie, 1:Qdof, 2+1:2+iRe-1) )
        !
        !if(ii <= 0) then  ! boundary edge
        !   d_gamma = elem%d_gamma
        !else
        !   ! inner edge
        !   elem1 => grid%elem(ii)
        !   d_gamma = max(elem%d_gamma , elem1%d_gamma)
        !endif

        ! set the penalty OLD  -- DOES NOT WORK for discontinuous difusion !!!!!!
        ! !allocate(penal2(1:Qdof) , source = 0.0)
        ! ! set the penalty
        ! call setPenaltySigma( elem, ie, Qdof, Re_1(1:iRe, 1:Qdof), d_gamma, penal2(1:Qdof) )
        ! penal(1:Qdof) =  penal2(1:Qdof)

        ! setting of the penalty
        allocate(penal(1:Qdof) )

        call setPenaltySigma_New( elem, ie, Qdof, penal(1:Qdof) )

        penal(1:Qdof) =  penal(1:Qdof) / elem%dn(ie)

        !write(*,'(a8, 2i5, 40es12.4)') 'Penal2:', elem%i, ie,  sigma,  &
        !     penal(1:5) !Qdof)

        ! compute the jump, even though it is not used for Neumann boundary faces !
        call ElementEdgeJump(elem,  ie,  jump(1:Qdof, 1:ndim) )
        !if (ii <= 0) print*, 'EdgeJump in conv = ' , jump

        !! OLD VARIANT 
        !! jump term (\sigma - b*n)[w_h] 
        !! b*n = 0 for \partial K^+
        !do k=1, ndim
        !   estim(1:Qdof, k) = ( bTimesN(1:Qdof,k) - penal(1:Qdof) )*jump(1:Qdof, k)
        !enddo


        do k=1, ndim
           estim(1:Qdof, k) = diff_F(1:Qdof,k) - penal(1:Qdof) * jump(1:Qdof, k)
        enddo

        !write(*,'(a8, 2i5, 40es12.4)') 'B x N', elem%i, Qdof,  estim(1:Qdof,1)
        !print*

        !write(*,'(a8, 2i5, 40es12.4)') 'B x N', elem%i, Qdof,  bTimesN(1:Qdof,1)
        !write(*,'(a8, 2i5, 40es12.4)') 'B x N', elem%i, Qdof,  jump(1:Qdof, 1)
        !write(*,'(a8, 2i5, 40es12.4)') 'B x N', elem%i, Qdof,  bTimesN(1:Qdof,1)*jump(1:Qdof, 1)
        !write(*,'(a8, 2i5, 40es12.4)') '[F] ', elem%i, Qdof,  diff_F(1:Qdof,1)
        !write(*,*) '_________________', elem%i, ie, elem%xi(ie, 1, 1:2)
        !print*,'nn = ', elem%n(ie,1:nbDim)
        !stop
        
        deallocate(penal) !, Re_1)
     end if
     deallocate( jump,  diff_F)
     !deallocate(bTimesN)
     
   end subroutine DWREdgeEstim_convection


   !> compute the diffusive part of the estimate DWRElemEstim_Edge1
   !> on one edge in edge integration nodes
   !> computed only on a) inner edges b) BC Neumann edges
   !> -0.5*[\mK grad(w_h)]*n
   subroutine DWREdgeEstim_diffusion( elem, ie, Qdof, estim)
     type(element), intent(in) :: elem
     integer, intent(in) :: ie ! index of the edge
     integer, intent(in) :: Qdof
     !      logical, intent(in) :: partialPlus ! b*n>=0 on this edge
     real, dimension(1:Qdof,1:ndim), intent(out) :: estim
     real, allocatable, dimension(:,:) :: Re_1, wi, wOut,gN
     real, allocatable, dimension(:,:,:) :: Dwi, DwOut, R_s_inner, R_s_outer
     class(element), pointer ::   elem1  ! elem1 = neigh element
     integer :: i, l, ii, ie1 ! neigh index

     ii = elem%face(neigh, ie)

     ! inner edge
     if (ii > 0) then
        ! needed to compute the values wOut, DwOut
        elem1 => grid%elem(ii)
        ie1 = elem%face(nei_i,ie)
        if(Qdof /= elem1%face(fGdof,ie1)) print*,'## Trouble in DWREdgeEstim_diffusion'


        ! amount  of diffusivity \epsilon = 1/Re
        allocate( Re_1(1:iRe,1:Qdof), source = state%model%Re1 )
        ! can contains the model parameters
        Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(ie, 1:Qdof, 2+1:2+iRe-1) )

        ! compute flux, w and Dw from inside
        allocate( wi(1:Qdof, 1:ndim ), source = 0.0 )
        allocate( Dwi(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )
        allocate( R_s_inner(1:Qdof, 1:nbDim, 1:ndim) )

        call Eval_w_Edge(elem, ie, wi(1:Qdof,1:ndim), .false.)

        call Eval_Dw_Edge(elem, ie, Dwi(1:Qdof, 1:ndim,1:nbDim), .false.)
        !print*,' DWR_RES 2'
        call Set_R_s_scalar( ndim, nbDim, iRe, Qdof, wi(1:Qdof,1:ndim), &
             Dwi(1:Qdof, 1:ndim, 1:nbDim), Re_1(1:iRe,1:Qdof), &
             R_s_inner(1:Qdof, 1:nbDim, 1:ndim) , &
             elem%xi(ie ,1:Qdof, 1:nbDim) )

        ! compute flux, w and Dw from outside
        allocate( wOut(1:Qdof, 1:ndim ), source = 0.0 )
        allocate( DwOut(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )
        allocate( R_s_outer(1:Qdof, 1:nbDim, 1:ndim) )

        call Eval_w_Edge(elem1, ie1, wOut(1:Qdof,1:ndim), .true.)
        call Eval_Dw_Edge(elem1, ie1, DwOut(1:Qdof, 1:ndim,1:nbDim), .true.)

        ! can contains the model parameters
        ! opposite ordering
        do l=1,Qdof
           Re_1(2:iRe, l) = elem1%xi(ie1, Qdof+1-l, 2+1:2+iRe-1)
        enddo

        call Set_R_s_scalar( ndim, nbDim, iRe, Qdof, wOut(1:Qdof,1:ndim), &
             DwOut(1:Qdof, 1:ndim, 1:nbDim), Re_1(1:iRe,1:Qdof), &
             R_s_outer(1:Qdof, 1:nbDim, 1:ndim) , &
             elem%xi(ie ,1:Qdof, 1:nbDim) ) ! opposite ordering of elem1%xi, we use elem%xi


        !jump - for SCALAR ONLY - 0.5[\mK grad(w_h)]*n
        do l=1,ndim
           estim(1:Qdof, l) = -0.5 * matmul( R_s_inner(1:Qdof, 1:nbDim, l) - &
                R_s_outer(1:Qdof, 1:nbDim, l) , elem%n(ie,1:nbDim) ) / &
                elem%dn(ie)
        end do !l

        deallocate( wi, wOut, Dwi, DwOut, Re_1, R_s_inner, R_s_outer )

     else
        ! Dirichlet BC - no diffusive part
        if ( elem%tBC(ie) == 0 ) then
           ! ZERO
           estim(1:Qdof, 1:ndim) = 0.0

           ! Neumann boundary should be subset of \partial K^+
        else if ( elem%tBC(ie) == -1 ) then
           ! amount  of diffusivity \epsilon = 1/Re
           allocate( Re_1(1:iRe,1:Qdof), source = state%model%Re1 )

           ! can contains the model parameters
           Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(ie, 1:Qdof, 2+1:2+iRe-1) )

           ! compute flux, w and Dw from inside
           allocate( wi(1:Qdof, 1:ndim ), Dwi(1:Qdof, 1:ndim, 1:nbDim) )
           allocate( R_s_inner(1:Qdof, 1:nbDim, 1:ndim) )

           call Eval_w_Edge(elem, ie, wi(1:Qdof,1:ndim), .false.)
           call Eval_Dw_Edge(elem, ie, Dwi(1:Qdof, 1:ndim,1:nbDim), .false.)
           !print*,' DWR_RES 4'
           call Set_R_s_scalar( ndim, nbDim, iRe, Qdof, wi(1:Qdof,1:ndim), &
                Dwi(1:Qdof, 1:ndim, 1:nbDim), Re_1(1:iRe,1:Qdof), &
                R_s_inner(1:Qdof, 1:nbDim, 1:ndim) , &
                elem%xi(ie ,1:Qdof, 1:nbDim) )

           ! compute g_N
           allocate( gN(1:Qdof, 1:ndim), source = 0.0 )
           ! compute the diff flux g_N = A(u,\grad(u)*n
           call EvalSolutionEdge_NeumannBoundaryFlux(elem, ie, Set_R_s_scalar, gN(1:Qdof, 1:ndim) )
           !            print*, 'TODO: TEST inhomogeneous Neumann BC in DWREdgeEstim_diff!', gN(1,1)
           !do l=1,Qdof
           !   write(37,*) elem%xi(ie, l, 1:2), gN(l,1)
           !enddo
           !write(37,'(x)')

           ! compute estimate
           do l=1,ndim
              estim(1:Qdof, l) = gN(1:Qdof,l) &
                   - ( matmul( R_s_inner(1:Qdof, 1:nbDim, l), &
                   elem%n(ie,1:nbDim) ) / elem%dn(ie) )
           end do !l

           deallocate( wi, Dwi, R_s_inner, Re_1, gN )
        else
           ! ZERO
           print*, 'Unknown BC in DWREdgeEstim_diffusion', elem%i, ie
           print*, 'Not Dirichlet and not Neumann BC !!!'
           estim(1:Qdof, 1:ndim) = 0.0
        endif
     endif

   end subroutine DWREdgeEstim_diffusion

   !> compute two boundary norms of a function given by basis coefficients
   !> normB = ||z||_\partial K ^2
   !> normD = ||\mK(z,grad(z))||_\partial K ^2, where \mK(.,.) is the diffusive flux
   subroutine DWRElemEstim_ComputeBoundaryNorms( elem, dof, funInDof, normB, normD)
     type(element), intent(in) :: elem
     integer, intent(in) :: dof
     real, dimension(1:ndim, 1:dof), intent(in) :: funInDof
     real, dimension(1:ndim), intent(out) :: normB, normD
     real, dimension(1:ndim) :: normEdgeB, normEdgeD
     real, allocatable, dimension(:,:) :: zi
     real, allocatable, dimension(:,:,:) :: Dzi
     integer :: ie, Qnum, Qdof, l

     normB(1:ndim) = 0.0
     normD(1:ndim) = 0.0

     ! go through edges
     do ie=1,elem%flen
        ! || zPlus - P(zPlus) ||_\gamma ^2
        Qnum = elem%face(fGnum,ie)
        Qdof = state%space%G_rule(Qnum)%Qdof
        if(Qdof /= elem%face(fGdof,ie)) print*, '## Trouble in adsadl'

        ! fun and its derivatives in integ nodes
        allocate( zi(1:Qdof,1:ndim), source = 0.0 )
        ! ||z||_\gamma^2
        do l = 1,ndim
           call Eval_func_Edge(elem, ie, dof, funInDof(l,1:dof), zi(1:Qdof,l), .false.)
           ! compute the norm squared
           call IntegrateFunctionEdge(elem, ie, zi(1:Qdof,l)**2.0, normEdgeB(l) )
        end do !l
        normB(1:ndim) = normB(1:ndim) + normEdgeB(1:ndim)

        ! second norm is needed only for problems with nonzero diffusion
        if ( ndim == 1 .and. state%model%Re /= 0.0 ) then
          allocate( Dzi(1:Qdof,1:ndim,1:nbDim), source = 0.0 )
          ! ||\mK(z,grad(z))||_\gamma^2
          ! Compute Dzi (from the inside -- .false.)!!!!!!!!!!!!!!!!!!!
          call Eval_funcDer_Edge(elem, ie, dof, funInDof(1:ndim,1:dof), Dzi(1:Qdof,1:ndim,1:nbDim), .false.)

          ! compute the L2-norm of the flux \mK grad(fun)
          call DWREdgeDiffusiveNorm(elem,ie, Qdof,zi(1:Qdof,1:ndim), &
               Dzi(1:Qdof, 1:ndim, 1:nbDim), normEdgeD(1:ndim) )
          normD(1:ndim) = normD(1:ndim) + normEdgeD(1:ndim)
          deallocate(Dzi)

          ! for euler equation it is set to zero
        else if (ndim == 4 .and. state%model%Re == 0.0 .and. state%modelName == 'NSe') then
            normD(1:ndim) = 0.0

        else
            normD(1:ndim) = 0.0
            if(elem%i <=1) print*, "Unknown case of problem in DWRElemEstim_ComputeBoundaryNorms"
!            stop "Unknown case of problem in DWRElemEstim_ComputeBoundaryNorms"
        end if

        deallocate( zi )
     end do !ie

   end subroutine DWRElemEstim_ComputeBoundaryNorms


   !> compute the diffusive norm
   !> on one edge in edge integration nodes
   !> || \mK(z,grad(z)) ||_\gamma^2, where \mK is the diffusive flux
   subroutine DWREdgeDiffusiveNorm( elem, ie, Qdof, zi, Dzi, estim)
     class(element), intent(in) :: elem
     integer, intent(in) :: ie ! index of the edge
     integer, intent(in) :: Qdof
     real, dimension(1:Qdof,1:ndim), intent(in) :: zi
     real, dimension(1:Qdof, 1:ndim, 1:nbDim), intent(in) :: Dzi
     real, dimension(1:ndim), intent(out) :: estim
     real, allocatable, dimension(:,:) :: Re_1
     real, allocatable, dimension(:,:,:) ::  R_s_inner
     real, dimension(1:nbDim) :: temp
     integer :: i, l

     ! amount  of diffusivity \epsilon = 1/Re
     allocate( Re_1(1:iRe,1:Qdof), source = state%model%Re1 )

     ! can contains the model parameters
     Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(ie, 1:Qdof, 2+1:2+iRe-1) )

     ! compute flux from the inside
     allocate( R_s_inner(1:Qdof, 1:nbDim, 1:ndim) )
     !print*,' DWR_RES 5'
     call Set_R_s_scalar( ndim, nbDim, iRe, Qdof, zi(1:Qdof,1:ndim), &
          Dzi(1:Qdof, 1:ndim, 1:nbDim), Re_1(1:iRe,1:Qdof), &
          R_s_inner(1:Qdof, 1:nbDim, 1:ndim) , &
          elem%xi(ie ,1:Qdof, 1:nbDim) )

     ! compute norm of || \mK(z,grad(z))||_\Gamma^2
     do l =1,ndim
        call IntegrateFunctionVecEdge(elem, ie, nbDim, transpose(R_s_inner(1:Qdof,1:nbDim,l)**2.0 ), temp(1:nbDim) )
        estim(l) = sum( temp(1:nbDim) )
     enddo !l

     deallocate( Re_1, R_s_inner )

   end subroutine DWREdgeDiffusiveNorm


   !> main subroutine for the PRIMAL part of the DWR_ANI estimates
   !> estim = |res_V|_K^2 * ||v||_K^2 + |res_B|_dK^2 * ||v||_dK^2 + |res_D|_dK^2 * ||\mK(v,grad(v))||_dK^2, where
   !> dK = \partial K, v = (zPlus - P_h(zPlus)), \mK is the diffusive flux
   !> the exact definition of res_* are in the article: Dolejsi 2017
   subroutine DWRAniEstimElemPrimal(elem, fullEstim, wType)
     class(element), intent(inout) :: elem
     real, intent(out) :: fullEstim ! full estimate with weight for HG refinement
     character(len=20), intent(in) :: wType  ! see below
     !integer, intent(in) :: itype  ! see below
     integer :: degP, dofP,dof,j
     real, dimension(1:ndim) :: weightV, weightB, weightD, estimV, estimB, estimD
     real, allocatable, dimension(:,:) :: weightFun ! zPlus - Projection(zPlus)

     ! HO_VI
     !itype = 1 ! reconstruction from vertex based
     ! others do not work now
     !itype =  1 ! reconstruction from WENO_LS with p+1
     !itype =  2 ! reconstruction from WENO_LS with p+2

     dof = elem%dof
     degP = elem%deg + state%p_mod_max!
     dofP = DOFtriang(degP)


     ! !!! CORRECTION OF BUGS IN SQUARES IN REZIDUA AND WEIGHTS, VD 2018/01/10

!!! COMPUTE RESIDUALS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     ! compute || \mK grad(zPlus - P(zPlus)) ||_\partial K
     ! compute \eta_{K,V}
     estimV( 1:ndim) = sqrt(DWRElemEstim_Volume( elem )) ! Added euler case inside
     ! compute \eta_{K,B}
     estimB(1:ndim) = sqrt( DWRElemEstim_Edge1( elem ))
     ! compute \eta_{K,D}
     estimD(1:ndim) = sqrt( DWRElemEstim_Edge2( elem ))

!!! WEIGHTS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     allocate(weightFun(1:ndim,1:dofP), source = 0.0 )
     ! weightFun = (zPlus - P(zPlus) ) and it is independent of time for stationary problems
     call setWeightFun_primalEst( elem, wType, weightFun(1:ndim,1:dofP) )
     ! if we want LS-reconstruct -> we need zST_LS wType == "LocLS"
     !weightFun(1:ndim,dof+1:dofP) = elem%zSTplus( 1:ndim, dof+1:dofP, 1 )

     ! compute weightV = || zPlus - P(zPlus) ||_K^2
     call EvalL2NormSquared_funInDof(elem, dofP, weightFun(1:ndim, 1:dofP), weightV(1:ndim))

!!! WEIGHTS - edges
     ! compute || zPlus - P_h(zPlus) ||_\partial K ^2 and
     ! || A*grad(zPlus - P_h(zPlus))||_\partial K ^2
     call DWRElemEstim_ComputeBoundaryNorms( elem, dofP, weightFun(1:ndim, 1:dofP) , &
          weightB(1:ndim), weightD(1:ndim) )

     ! VD
     weightV(1:ndim) = sqrt( weightV(1:ndim) )
     weightB(1:ndim) = sqrt( weightB(1:ndim) )
     weightD(1:ndim) = sqrt( weightD(1:ndim) )

!     ! ONLY PRIMAL
!     estimV(:) = estimV(:) !* 0.
!     estimB(:) = estimB(:) !* 0.
!     estimD(:) = estimD(:) !* 0.

     fullEstim = sum(estimV(1:ndim)*weightV(1:ndim) &
          + estimB(1:ndim)*weightB(1:ndim) + estimD(1:ndim)*weightD(1:ndim))


     elem%eta( dwrEtaKV,1:ndim) = estimV(1:ndim)
     elem%eta( dwrEtaKB,1:ndim) = estimB(1:ndim)
     elem%eta( dwrEtaKD,1:ndim) = estimD(1:ndim)

     elem%eta( dwrWeightKV,1:ndim) = weightV(1:ndim)
     elem%eta( dwrWeightKB,1:ndim) = weightB(1:ndim)
     elem%eta( dwrWeightKD,1:ndim) = weightD(1:ndim)

     ! if(elem%i == 555 .or. elem%i == 155.or. elem%i == 300) then
     !    print*, ''
     !    print*, 'elem = ',elem%i, 'fullEstim = ', fullEstim
     !    print*, 'prim estim Volume = ',  estimV , weightV
     !    print*, 'prim estim EdgesB = ',  estimB , weightB
     !    print*, 'prim estim EdgesD = ', estimD , weightD
     ! endif
     deallocate( weightFun )

   end subroutine DWRAniEstimElemPrimal


   !> main subroutine for the PRIMAL part of the DWR_ANI estimates
   !> estim = |res_V|_K^2 * ||v||_K^2 + |res_B|_dK^2 * ||v||_dK^2 + |res_D|_dK^2 * ||\mK(v,grad(v))||_dK^2, where
   !> dK = \partial K, v = (zPlus - P_h(zPlus)), \mK is the diffusive flux
   !> the exact definition of res_* are in the article: Dolejsi 2017
   subroutine DWRAniEstimElemDual(elem, J, fullEstim, wType)
     class(element), intent(inout) :: elem
     class( Target_functional_t ), intent(in) :: J
     real, intent(out) :: fullEstim ! full estimate with weight for HG refinement
     character(len=20), intent(in) :: wType  ! see below
     integer :: degP, dofP, dof, ie, l
     real, dimension(1:ndim) :: weightV, weightB, weightD, estimV, estimB, estimD
     real, allocatable, dimension(:,:) :: weightFun ! wPlus - Projection(wPlus)

     ! HO_VI
     !itype = -1 ! reconstruction from vertex based
     !itype =  1 ! reconstruction from WENO_LS with p+1
     !itype =  2 ! reconstruction from WENO_LS with p+2

     dof = elem%dof
     degP = elem%deg + state%p_mod_max  !
     dofP = DOFtriang(degP)

     ! !!! CORRECTION OF BUGS IN SQUARES IN REZIDUA AND WEIGHTS, VD 2018/01/10

!!! COMPUTE RESIDUALS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     estimV(1:ndim) = 0.0
     ! compute \eta_{K,V}
     estimV(1:ndim) = sqrt( DWRAniEstimElemDual_Volume( elem, J ) )

     ! compute the boundary residuals \eta_{B,V} and \eta_{D,V}
     call DWRAniEstimElemDual_Edges( elem, J, estimB(1:ndim), estimD(1:ndim) )

     estimB(1:ndim) = sqrt(estimB(1:ndim)) ! VD
     estimD(1:ndim) = sqrt(estimD(1:ndim)) ! VD

!!! WEIGHTS !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
     allocate(weightFun(1:ndim,1:dofP), source = 0.0 )
     ! weightFun = (wPlus - P(wPlus) ) and it is independent of time for stationary problems
     !weightFun(1:ndim,dof+1:dofP) = elem%wSTplus( 1:ndim, dof+1:dofP, 1)
     call setWeightFun_dualEst( elem, wType, weightFun(1:ndim,1:dofP))

     ! compute || wPlus - P(wPlus) ||_K^2
     call EvalL2NormSquared_funInDof( elem, dofP, weightFun(1:ndim, 1:dofP), weightV(1:ndim) )

!!! WEIGHTS - edges
     ! compute || wPlus - P(wPlus) ||_\partial K ^2
     ! compute || \mK grad(wPlus - P(wPlus)) ||_\partial K ^2
     call DWRElemEstim_ComputeBoundaryNorms( elem, dofP, weightFun(1:ndim, 1:dofP) , &
          weightB(1:ndim), weightD(1:ndim) )

     ! VD
     weightV(1:ndim) = sqrt( weightV(1:ndim) )
     weightB(1:ndim) = sqrt( weightB(1:ndim) )
     weightD(1:ndim) = sqrt( weightD(1:ndim) )

     !  ONLY pRIMAL
!     estimV(:) = estimV(:) !* 2.
!     estimB(:) = estimB(:) !* 2.
!     estimD(:) = estimD(:) !* 2.

     ! put all parts oft the estimate together
     fullEstim = sum( estimV(1:ndim)*weightV(1:ndim) &
          + estimB(1:ndim)*weightB(1:ndim) + estimD(1:ndim)*weightD(1:ndim) )


     elem%eta( dwrEtaKV_dual, 1:ndim) = estimV(1:ndim)
     elem%eta( dwrEtaKB_dual, 1:ndim) = estimB(1:ndim)
     elem%eta( dwrEtaKD_dual, 1:ndim) = estimD(1:ndim)

     elem%eta( dwrWeightKV_dual, 1:ndim) = weightV(1:ndim)
     elem%eta( dwrWeightKB_dual, 1:ndim) = weightB(1:ndim)
     elem%eta( dwrWeightKD_dual, 1:ndim) = weightD(1:ndim)

      ! if(elem%i == 555 .or. elem%i == 155.or. elem%i == 300) then
      !    print*, 'elem = ',elem%i, 'dual fullEstim = ', fullEstim, elem%xc(:)
      !    print*, 'dual estim Volume = ', estimV(1) , weightV
      !    print*, 'dual estim EdgesB = ', estimB(1) , weightB
      !    print*, 'dual estim EdgesD = ', estimD(1) , weightD
      !    print*,'---------------------------------------------------------------'
      ! endif
      ! ! if(elem%i == 555)  stop "ERD342WE319"
     
     deallocate( weightFun )

   end subroutine DWRAniEstimElemDual


   subroutine setDWRweightingBoundaryFunctions( elem, ie, JJ, Qdof, jD, jN)
     type(element), intent(in) :: elem
     class( Target_functional_t), intent(in) :: JJ
     integer, intent(in) :: ie, Qdof
     real, dimension(1:Qdof, 1:ndim), intent(out) :: jD, jN
     real :: time
     integer :: i

     jD(1:Qdof,1:ndim) = 0.0
     jN(1:Qdof,1:ndim) = 0.0
     time = state%time%ctime ! should not depend on time

     ! boundary edge
     ! compute jD or jN - only on boundary edges ;-)
     ! and in the support of J
     if ( elem%isBoundary(ie) .and. (elem%iSubMesh==1) .and. elem%iSubmeshFace==ie) then

        select type(JJ)
           class is (BoundaryValue_t)
              ! jN
              !   homogeneous Neumann   or    Neumann
           if ((elem%iBC(ie) == 0 ) .or. (elem%tBC(ie) == -1) ) then
              do i = 1,Qdof
                 jN(i,1:ndim) = JJ%evalWeightFunction( elem%xi( ie,i,1:nbDim), time )
              end do !j
           endif

           class is (BoundaryFlux_t)
              ! jD - Dirichlet boundary
           if ( elem%tBC(ie) == 0 ) then
              do i = 1,Qdof
                 jD(i,1:ndim) = JJ%evalWeightFunction( elem%xi( ie,i,1:nbDim), time )
              end do !j
           endif
           class default
              ! do nothing
        end select
     endif


   end subroutine setDWRweightingBoundaryFunctions

   !> for the dual solution: compute the jump (zInner for boundary edges)
   !> and jump of diffusion flux, ie. [A*grad(z)]*n
   subroutine prepareDualSolutionAndFluxes( elem, ie, Qdof, jump, diffFlux)
     type(element), intent(in) :: elem
     integer, intent(in) :: ie, Qdof
     !      logical, intent(in) :: partialPlus
     real, dimension(1:Qdof, 1:ndim), intent(out) :: jump
     real, dimension(1:Qdof, 1:ndim), intent(out) :: diffFlux
     real, allocatable, dimension(:,:,:) :: R_s_inner, R_s_outer, Dwi, Dzi, DzOut, DwOut
     real, allocatable, dimension(:,:) :: Re_1, wi, zi, zOut, wOut
      real, allocatable, dimension(:,:,:,:,:) :: K_sk
     class(element), pointer :: elem1
     integer :: dof , l, ie1, i,k
     logical :: iprint = .false.

     iprint = .false.
     if(elem%i == 555) iprint = .true.
     if(elem%i == 155) iprint = .true.

     allocate( wi(1:Qdof, 1:ndim ), source = 0.0 )
     allocate( Dwi(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )
     allocate( zi(1:Qdof, 1:ndim ), source = 0.0 )
     allocate( Dzi(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )
     allocate( R_s_inner(1:Qdof, 1:nbDim, 1:ndim) )
     allocate( R_s_outer(1:Qdof, 1:nbDim, 1:ndim), source = 0.0 )
     allocate( Re_1(1:iRe, 1:Qdof), source = state%model%Re1 )
      ! can contains the model parameters
      Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(ie, 1:Qdof, 2+1:2+iRe-1) )

     dof = elem%dof

     do l = 1,ndim
        ! we assume zST is constant in Time
        call Eval_func_Edge(elem, ie, dof, elem%wST(l,1:dof,1), wi(1:Qdof,l), .false.)
        call Eval_func_Edge(elem, ie, dof, elem%zST(l,1:dof,1), zi(1:Qdof,l), .false.) ! not used
     enddo

     call Eval_funcDer_Edge(elem, ie, dof, elem%wST(1:ndim,1:dof,1), &
          Dwi(1:Qdof,1:ndim,1:nbDim), .false.)

     call Eval_funcDer_Edge(elem, ie, dof, elem%zST(1:ndim,1:dof,1), &
          Dzi(1:Qdof,1:ndim,1:nbDim), .false.)

     !print*,' DWR_RES 6'-- wrong variant, works only for linear problem
     !call Set_R_s_scalar( ndim, nbDim, iRe, Qdof, zi(1:Qdof,1:ndim), &
     !     Dzi(1:Qdof, 1:ndim, 1:nbDim), Re_1(1:iRe,1:Qdof), &
     !     R_s_inner(1:Qdof, 1:nbDim, 1:ndim), elem%xi(ie ,1:Qdof, 1:nbDim) )

     !if(iprint) write(*,'(a10,2i5,40es12.4)') 'R_s)n1',elem%i,1,R_s_inner(1:Qdof, 1, 1)
     !if(iprint) write(*,'(a10,2i5,40es12.4)') 'R_s)n2',elem%i,2,R_s_inner(1:Qdof, 2, 1)

     ! compute the linearizared flux
      allocate( K_sk(1:Qdof,1:nbDim,1:nbDim,1:ndim,1:ndim), source = 0.0 )

      call Set_K_sk_scalar(ndim, nbDim, iRe, Qdof, wi(1:Qdof,1:ndim), &
            Dwi(1:Qdof,1:ndim,1:nbDim), Re_1(1:iRe,1:Qdof), &
            K_sk(1:Qdof,1:nbDim,1:nbDim,1:ndim,ndim), elem%xi(0,1:Qdof, 1:nbDim) )

      ! ndim == 1 !!!!!!!!!!!!!!
      R_s_inner(1:Qdof, 1:nbDim, 1) = 0.
      do i=1,nbDim
         do k=1,nbDim
            R_s_inner(1:Qdof, i, 1) = R_s_inner(1:Qdof, i, 1) &
                 + K_sk(1:Qdof, i, k, 1, 1) * Dzi(1:Qdof, 1, k)
         enddo
      enddo

      !if(iprint) write(*,'(a10,2i5,40es12.4)') 'KK_s)n1',elem%i,1,R_s_inner(1:Qdof, 1, 1)
      !if(iprint) write(*,'(a10,2i5,40es12.4)') 'KK_s)n2',elem%i,2,R_s_inner(1:Qdof, 2, 1)
      
     ! boundary edge
     if (elem%isBoundary(ie)) then

        ! jump
        jump(1:Qdof,1:ndim) = zi(1:Qdof,1:ndim)

        do l=1,ndim
           diffFlux(1:Qdof, l) = matmul( R_s_inner(1:Qdof, 1:nbDim, l), &
                elem%n(ie,1:nbDim) ) / elem%dn(ie)
        end do !l
        ! inner edge - we need also the outer values
     else

        elem1 => grid%elem( elem%face(neigh, ie) )
        ie1 = elem%face(nei_i,ie)
        if(Qdof /= elem1%face(fGdof,ie1)) print*,'## Trouble in cdsas'

        ! compute flux, w and Dw from outside
        allocate( wOut(1:Qdof, 1:ndim ), source = 0.0 )
        allocate( DwOut(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )

        allocate( zOut(1:Qdof, 1:ndim ), source = 0.0 )
        allocate( DzOut(1:Qdof, 1:ndim, 1:nbDim), source = 0.0 )

        do l = 1,ndim
           ! we assume zST is constant in Time
           call Eval_func_Edge(elem1, ie1, elem1%dof, elem1%wST(l,1:elem1%dof,1), &
                wOut(1:Qdof,l), .true.)
           call Eval_func_Edge(elem1, ie1, elem1%dof, elem1%zST(l,1:elem1%dof,1), &
                zOut(1:Qdof,l), .true.)  ! not ised
        enddo
        
        call Eval_funcDer_Edge(elem1, ie1, elem1%dof, elem1%wST(1:ndim,1:elem1%dof,1), &
             DwOut(1:Qdof,1:ndim,1:nbDim), .true.)

        call Eval_funcDer_Edge(elem1, ie1, elem1%dof, elem1%zST(1:ndim,1:elem1%dof,1), &
             DzOut(1:Qdof,1:ndim,1:nbDim), .true.)

        ! can contains the model parameters
        do l=1,Qdof
           Re_1(2:iRe, l) = elem1%xi(ie1, Qdof+1-l, 2+1:2+iRe-1)
        enddo

        !print*,' DWR_RES 7', maxval(Re_1(2, 1:Qdof)), minval(Re_1(2, 1:Qdof))
      ! compute the flux -- wrong variant, works only for linear problem
      !  call Set_R_s_scalar( ndim, nbDim, iRe, Qdof, zOut(1:Qdof,1:ndim), &
      !       DzOut(1:Qdof, 1:ndim, 1:nbDim), Re_1(1:iRe,1:Qdof), &
      !       R_s_outer(1:Qdof, 1:nbDim, 1:ndim) , &
      !       elem%xi(ie ,1:Qdof, 1:nbDim) ) ! opposite ordering of elem1%xi, we use elem%xi

        !if(iprint) write(*,'(a10,2i5,40es12.4)') 'OU R_s)n1',elem%i,1,R_s_outer(1:Qdof, 1, 1)
        !if(iprint) write(*,'(a10,2i5,40es12.4)') 'OU R_s)n2',elem%i,2,R_s_outer(1:Qdof, 2, 1)

      ! compute the linearizared flux
        call Set_K_sk_scalar(ndim, nbDim, iRe, Qdof, wOut(1:Qdof,1:ndim), &
             DwOut(1:Qdof,1:ndim,1:nbDim), Re_1(1:iRe,1:Qdof), &
             K_sk(1:Qdof,1:nbDim,1:nbDim,1:ndim,ndim), &
             elem%xi(0,1:Qdof, 1:nbDim) )   ! opposite ordering of elem1%xi, we use elem%xi

        R_s_outer(1:Qdof, 1:nbDim, 1) = 0.
        do i=1,nbDim
           do k=1,nbDim
              R_s_outer(1:Qdof, i, 1) = R_s_outer(1:Qdof, i, 1) &
                   + K_sk(1:Qdof, i, k, 1, 1) * DzOut(1:Qdof, 1, k)
           enddo
        enddo

        !if(iprint) write(*,'(a10,2i5,40es12.4)') 'OU KK_s)n1',elem%i,1,R_s_outer(1:Qdof, 1, 1)
        !if(iprint) write(*,'(a10,2i5,40es12.4)') 'OU KK_s)n2',elem%i,2,R_s_outer(1:Qdof, 2, 1)
      

        ! jump [zIn - zOut]
        jump(1:Qdof, 1:ndim) = zi(1:Qdof, 1:ndim) - zOut(1:Qdof, 1:ndim)

        deallocate(zOut, DzOut, wOut, DwOut)

        !jump of the diffusive flux for SCALAR ONLY - 0.5[\mK grad(z_h)]*n
        do l=1,ndim
           diffFlux(1:Qdof, l) = matmul( R_s_inner(1:Qdof, 1:nbDim, l) - &
                R_s_outer(1:Qdof, 1:nbDim, l) , elem%n(ie,1:nbDim) ) / &
                elem%dn(ie)
        end do !l

     endif !if (elem%isBoundary(ie)) then

     deallocate( Re_1, R_s_inner, R_s_outer, zi, Dzi, wi, Dwi, K_sk )

   end subroutine prepareDualSolutionAndFluxes

   !> compute both dual edge residuals r^*_B,K and R^*_D,K
   subroutine DWRAniEstimElemDual_Edges( elem, J, estimB, estimD )
     type(element), intent(inout) :: elem
     class( Target_functional_t), intent(in) :: J
     real, dimension(1:ndim), intent(out) :: estimB, estimD
     integer :: ie, Qdof, Qnum
     real, allocatable, dimension(:,:) :: jD, jN, jump, BtimesN, diffFlux, estim1, estim2
     real, allocatable, dimension(:) :: penal, penal2
     real, allocatable, dimension(:,:) :: Re_1
     real, dimension(1:ndim) :: estimBEdge, estimDEdge
     real :: d_gamma
     integer :: i, k, ii
     !logical :: partialPlus

     if (state%modelName == 'scalar') then
        call DWRAniEstimElemDual_Edges_Scalar(elem, J, estimB, estimD )

     else if (ndim == 4 .and. state%model%Re == 0.0 .and. state%modelName == 'NSe') then
        call DWRAniEstimElemDual_Edges_Euler(elem, J, estimB, estimD )
     else
        stop "DWRAniEstimElemDual_Volume: unknown kind of problem!"
     end if

   end subroutine DWRAniEstimElemDual_Edges

   !> compute both dual edge residuals r^*_B,K and R^*_D,K
   subroutine DWRAniEstimElemDual_Edges_Scalar( elem, J, estimB, estimD )
     type(element), intent(inout) :: elem
     class( Target_functional_t), intent(in) :: J
     real, dimension(1:ndim), intent(out) :: estimB, estimD
     integer :: ie, Qdof, Qnum
     real, allocatable, dimension(:,:) :: jD, jN, jump, BtimesN, diffFlux, estim1, estim2
     real, allocatable, dimension(:) :: penal, penal2
     real, allocatable, dimension(:,:) :: Re_1
     real, dimension(1:ndim) :: estimBEdge, estimDEdge
     real :: d_gamma
     integer :: i, k, ii
     logical :: iprint = .false.

     !iprint = .false.
     !if(elem%i == 555) iprint = .true.
     !if(elem%i == 155) iprint = .true.

     ! edge estims
     estimB(1:ndim) = 0.0
     estimD(1:ndim) = 0.0

     do ie = 1,elem%flen
        Qnum = elem%face(fGnum,ie)
        Qdof = state%space%G_rule(Qnum)%Qdof
        if(Qdof /= elem%face(fGdof,ie)) print*, '## Trouble in DWRElemEstim_Edge1'

        allocate( jD(1:Qdof,1:ndim), jN(1:Qdof,1:ndim), source = 0.0 )
        allocate( jump(1:Qdof,1:ndim))
        allocate( diffFlux(1:Qdof,1:ndim) )
        allocate( BtimesN(1:Qdof,1:ndim), source = 0.0 )
        allocate( estim1(1:Qdof,1:ndim), estim2(1:Qdof,1:ndim) )

        call setDWRweightingBoundaryFunctions( elem, ie, J,  Qdof, jD(1:Qdof,1:ndim), jN(1:Qdof,1:ndim) )


        ! FR - comment the following, only for comparison with setPenalty_new
        ! OLD VARIANT 
        ! allocate(penal(1:Qdof) ,  Re_1(1:iRe,1:Qdof), source = 0.0  ) ! penalty & Re_1
        ! if(state%model%Re > 0.) Re_1(1,1:Qdof) = 1./state%model%Re
        ! ! can contain the material parameters
        ! Re_1(2:iRe, 1:Qdof) = transpose( elem%xi(ie, 1:Qdof, 2+1:2+iRe-1) )


        ! ! inner /outer
        ! ii = elem%face(neigh, ie)
        ! ! d_gamma for inner or boundary faces
        ! if(ii <= 0) then  ! boundary edge
        !    d_gamma = elem%d_gamma
        ! else ! inner edge
        !    d_gamma = max(elem%d_gamma , grid%elem(ii)%d_gamma)
        ! endif

        ! !! set the penalty OLD  -- DOES NOT WORK for discontinuous difusion !!!!!!
        ! allocate(penal2(1:Qdof) , source = 0.0)
        ! call setPenaltySigma( elem, ie, Qdof, Re_1(1:iRe, 1:Qdof), d_gamma, penal2(1:Qdof) )
        ! penal(1:Qdof) =  penal2(1:Qdof)

        ! new subroutine
        allocate(penal(1:Qdof))
        call setPenaltySigma_New( elem, ie, Qdof, penal(1:Qdof) )
        !if ( norm2( penal - penal2 ) > 1.E-14 ) then
        !!   print*, 'New version of setPenalty differs from the old one1!', norm2( penal), norm2(penal2 )
        !!   stop 'cdasada'
        !endif

        !deallocate(penal2)

        penal(1:Qdof)  = penal(1:Qdof)  /  elem%dn(ie)
        !write(*,'(a8, 2i5, 40es12.4)') 'penal:', elem%i, ie,  sigma,  &
        !     penal(1:5) !Qdof)

        ! compute [z] and [A*grad(z)]*n
        call prepareDualSolutionAndFluxes( elem, ie, Qdof, jump, diffFlux )

        if(iprint) write(*,'(a12,2i5, 40es12.4)') 'diffFlux:',elem%i, ie, diffFlux(:,1)
        
        ! put together to estimB, estimD
        ! estimB: -0.5 [A\grad(z_h)]*n - (\sigma + b*n)[z_h] on inner edges
        !         -\sigma(z_h + jD) on Gamma_D \cap \partial K^MINUS
        !         -(\sigma + b*n)(z_h + jD) on Gamma_D \cap \partial K^PLUS
        !         jN - A*grad(zh)*n - b*n z_h on Gamma_N
        !         0 elsewhere
        if ( elem%isBoundary(ie) ) then
           ! replaced by computeBtimesN_nonlinear!!!
           ! b*n is there only on the part ob boundary where b*n>0 - last argument
           ! for both Dirichlet and Neumann
           !call computeBtimesN(elem, ie, Qdof, BtimesN(1:Qdof,1:ndim), .true. )
           

           ! Dirichlet
           if (elem%tBC(ie) == 0) then
              ! Nonlinear variant of subroutine computeBtimesN
              call computeBtimesN_nonlinear(elem, ie, Qdof, BtimesN(1:Qdof,1:ndim),1 )

              ! sum of contributions
              do k=1, ndim
                 estim1(1:Qdof,k) = (-1.0)*(BtimesN(1:Qdof,k)+ penal(1:Qdof) ) &
                      * (jump(1:Qdof,k) + jD(1:Qdof,k))
              enddo
              !write(*,'(a8, i5, 30es12.4)') 'KB c', elem%i, &
              !     sum(jump(1:Qdof,1) + jD(1:Qdof,1))/Qdof, &
              !     sum(jump(1:Qdof,1))/Qdof, sum(jD(1:Qdof,1))/Qdof
              !else !partial MINUS
              !  estim1(1:Qdof,1:ndim) = (-1.0)*sigma  * ( jump(1:Qdof,1:ndim) + jD(1:Qdof,1:ndim) )
              !endif
              ! Neumann

           else if (elem%tBC(ie) == -1) then
              !print*,'BE: neumann'

              ! Nonlinear variant of subroutine computeBtimesN
              call computeBtimesN_nonlinear(elem, ie, Qdof, BtimesN(1:Qdof,1:ndim), 0 )

              ! computing of the Neumann edge residual
              if( J%id == 1 .and. J%iWeight == 11) then  ! key NTF
                 ! nonlinear case, J(u) = (jN f'(u).n,  u)_{\gomN}, jump = z
                 estim1(1:Qdof,1:ndim) =  - diffFlux(1:Qdof, 1:ndim) &
                      + BtimesN(1:Qdof,1:ndim)* (jN(1:Qdof,1:ndim) - jump(1:Qdof,1:ndim) )
              else
                 ! linear case, J(u) = (jN, u)_{\gomN}, jump = z
                 estim1(1:Qdof,1:ndim) = jN(1:Qdof,1:ndim) - diffFlux(1:Qdof, 1:ndim) &
                      - BtimesN(1:Qdof,1:ndim)*jump(1:Qdof,1:ndim)
              endif
              
              !write(*,'(a8, i5, 30es12.4)') 'b xxx N', elem%i, BtimesN(1:Qdof, 1)
              !write(*,'(a8, 3i5, 30es12.4)') 'ESTIM', J%id, J%iWeight , elem%i, estim1(1:Qdof, 1)

              
           else
              print*, 'What to do here? BC not Gamma_N not Gamma_D'
              ! not Gamma_N not Gamma_D
           endif

        else ! inner edge
           ! both sing of b*n are used here - INCORRECT !!
           !call computeBtimesN(elem, ie, Qdof, BtimesN(1:Qdof,1:ndim) )   ! VD_OB

           ! b*n only on K^+ ie. where b*n>=0
           !call computeBtimesN(elem, ie, Qdof, BtimesN(1:Qdof,1:ndim), .true. )

           ! Nonlinear variant of subroutine computeBtimesN
           call computeBtimesN_nonlinear(elem, ie, Qdof, BtimesN(1:Qdof,1:ndim), 1 )

           do k=1, ndim
              estim1(1:Qdof,k) = (-0.5) * diffFlux(1:Qdof,k) - &
                   (BtimesN(1:Qdof,k) + penal(1:Qdof) )*jump(1:Qdof,k)
           enddo

        endif
        ! integrate the estimate
        call IntegrateFunctionVecEdgeSquared(elem, ie, ndim, estim1(1:Qdof,1:ndim), &
             estimBEdge(1:ndim))

        estimB(1:ndim) = estimB(1:ndim) + estimBEdge(1:ndim)

        !if(elem%isBoundary(ie) .and. elem%tBC(ie) == 0) &
        ! if(elem%i == 300 ) &
        !      write(*,'(a10, 2i5, 2es12.4,a2,40es12.4)') 'estim*',elem%i, elem%face(neigh, ie), &
        !      elem%xc(1:2), '|',&
        !      estimBedge(1),estimB(1),sqrt(estimB(1))


        ! estimD: 0.5*[z_h] on inner edges
        !         jD + z_h on Gamma_D
        !         0 elswhere on \partial \Omega
        if (elem%isBoundary(ie) ) then ! Boundary
           if  ( elem%tBC(ie) == 0) then ! Dirichlet
              estim2(1:Qdof,1:ndim) =  jump(1:Qdof,1:ndim) + jD(1:Qdof,1:ndim) ! jump of z
           else !other BC
              estim2(1:Qdof,1:ndim) = 0.0
           endif
        else !inner
           estim2(1:Qdof,1:ndim) = 0.5*jump(1:Qdof,1:ndim) ! jump of z
        end if

        ! integrate the estimate
        call IntegrateFunctionVecEdgeSquared(elem, ie, ndim, estim2(1:Qdof,1:ndim), &
             estimDEdge(1:ndim))
        estimD(1:ndim) = estimD(1:ndim) + estimDEdge(1:ndim)

        deallocate(jD, jN, jump, BtimesN, diffFlux, estim1, estim2, penal) !, Re_1 )

     end do ! edges

   end subroutine DWRAniEstimElemDual_Edges_Scalar

   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!! EULER EQUATION !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
   !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!

   subroutine DWREdgeEstim_Euler(elem, ie, Qdof, fluxError)
      class(element), intent(inout) :: elem
      integer, intent(in) :: ie
      integer, intent(in) :: Qdof
      real, dimension(1:Qdof,1:ndim), intent(out) :: fluxError
!     real, dimension(1:Qdof,1:nbDim,1:ndim) :: convVec
!     class(element), pointer :: elem1
      real, allocatable, dimension(:, :) :: nc, wi
      real, allocatable, dimension(:, :) :: physicalFlux, numFlux
      real, allocatable, dimension(:, :,:) :: Ppm
      integer :: kk, i
!     real :: d_gamma

      fluxError(1:Qdof,1:ndim) = 0.0
      kk = elem%face(neigh,ie)
      if (Qdof /= elem%face(fGdof,ie)) &
        print*, "CONTROL: DWREdgeEstim_Euler Qdof /= elem%face(fGdof,ie) "

      allocate( numFlux(1:Qdof, 1:ndim), source = 0.0)

      ! P(w,n)
      allocate( physicalFlux(1:Qdof, 1:ndim), source = 0.0)
      call ComputeElementInviscidPhysicalFlux(elem, ie, Set_f_s_Euler, physicalFlux)

      ! inner edge
      if (kk > 0) then

        ! estim = P(w^(+),n) - H(w^(+),w^(-),n)
        ! compute Vijayasundaram num flux
        call Eval_NFlux_Edge(Set_Ppm_Euler, elem, ie, numFlux(1:Qdof, 1:ndim))
        
      ! W edge (profile)
      else if (elem%iBC(ie) == 0 ) then
        if (state%model%discretizationOfSlipBC_Euler == 1) then
            stop "DWREdgeEstim_Euler not working for 1st kind of treatment on Gamma_W BC!"
            !call ElementInviscidWallEdge(elem, j)

        else if (state%model%discretizationOfSlipBC_Euler == 2) then
            ! || P(w^+,n) - \mP_W(u_\Gamma(w^+),n) u_\Gamma(w^+)  ||
            ! \mP_W
             allocate(wi(1:Qdof,1:ndim))
             call Eval_w_Edge(elem, ie, wi, .false.)

             ! setting of outer normals in integration nodes
             allocate(nc(1:Qdof, 1:nbDim) )
             if(elem%ibcur > 0  .and. elem%jcur == ie) then
                !nc(1:Qdof,1:nbDim) = elem%nc(1:Qdof,1:nbDim)
                nc(1:Qdof,1) = elem%nc(1:Qdof,1)/ elem%dnc(1:Qdof)  ! UNIT normal
                nc(1:Qdof,2) = elem%nc(1:Qdof,2)/ elem%dnc(1:Qdof)

             else
                nc(1:Qdof,1) = elem%n(ie,1) / elem%dn(ie)
                nc(1:Qdof,2) = elem%n(ie,2) / elem%dn(ie)
             endif

             ! compute u_Gamma(w) = w - (w*n)n ! length of n is not important here
             call UpdateMirror(ndim,Qdof,wi(1:Qdof,1:ndim),nc(1:Qdof, 1:nbDim))

             ! evaluation of matrix - NOT unit n here
             allocate(Ppm(1:Qdof, 1:ndim, 1:ndim), source = 0.0 )
             ! FR_Euler compute the matrix P_W(u_\Gamma(w)) from DolFeist book
             call Set_Ppm_Euler_Slip(ndim, nbDim, Qdof, wi(1:Qdof,1:ndim), nc(1:Qdof,1:nbDim), &
             Ppm(1:Qdof, 2:3, 1:ndim))

             ! P_w(u_\Gamma(w))*u_\Gamma(w)
             do i=1,Qdof
                numFlux(i,1:ndim) = matmul(Ppm(i, 1:ndim, 1:ndim), wi(i, 1:ndim) )
             enddo

             deallocate( nc, wi, Ppm )

        else if (state%model%discretizationOfSlipBC_Euler == 3) then
            ! BC version 3) Mirror BC
            ! compute Vijayasundaram num flux
            call Eval_NFlux_Edge( Set_Ppm_Euler, elem, ie, &
                    numFlux(1:Qdof, 1:ndim) )

        end if
      ! IO edge
      else
         ! approach based on the solution of the exact Riemann problem
         ! compute
         ! \mP^-(w^+,n)*(w^+ - u_RP(w^+))
         ! TODO - for tests


         ! second possibility:
         ! P - H_VS(w^+,u_RP(w^+),n)
         call Eval_NFlux_Edge( Set_Ppm_Euler, elem, ie, &
                       numFlux(1:Qdof, 1:ndim) )

      end if

      fluxError(1:Qdof, 1:ndim) = physicalFlux(1:Qdof, 1:ndim) &
                     - numFlux(1:Qdof, 1:ndim)

      deallocate( numFlux, physicalFlux )

   end subroutine DWREdgeEstim_Euler


   !> compute both dual edge residuals r^*_B,K and R^*_D,K
   subroutine DWRAniEstimElemDual_Edges_Euler( elem, J, estimB, estimD )
     type(element), intent(inout) :: elem
     class( Target_functional_t), intent(in) :: J
     real, dimension(1:ndim), intent(out) :: estimB, estimD
     integer :: ie, Qdof, Qnum
     real, allocatable, dimension(:) :: ppm_loc
     real, allocatable, dimension(:,:) :: res, wAver, zJump, wi, nc
     real, allocatable, dimension(:,:,:) :: HH
     real, allocatable, dimension(:,:,:,:) :: Ppm
     real, dimension(1:ndim) :: estimBEdge, estimDEdge
     !real :: d_gamma
     integer :: i, k, ii, l, kk, dof

     dof = elem%dof

     !logical :: partialPlus
     estimB = 0.0
     ! estimD = 0 in Euler eq
     estimD = 0.0

     do ie = 1, elem%flen
        estimBEdge(1:ndim) = 0.0
        Qnum = elem%face(fGnum,ie)
        Qdof = state%space%G_rule(Qnum)%Qdof
        if(Qdof /= elem%face(fGdof,ie)) &
             print*, '## Trouble in DWRAniEstimElemDual_Edges_Euler'

        allocate( res(1:Qdof, 1:ndim), source = 0.0 )

        allocate(nc(1:Qdof, 1:nbDim) )
        if(elem%ibcur > 0  .and. elem%jcur == ie) then  ! curved edge
           !if(Qdof /= size(elem%nc, 1) ) then
           !   print*,'Mischmatch EDR4ed$', elem%i, ie, Qdof,  size(elem%nc, 1), Qnum
           !endif
           nc(1:Qdof,1:nbDim) = elem%nc(1:Qdof,1:nbDim)
        else
           nc(1:Qdof,1) = elem%n(ie,1)
           nc(1:Qdof,2) = elem%n(ie,2)
        endif



        kk = elem%face(neigh,ie)
        ! inner edge
        if (kk > 0) then
          ! - \mP^+(<w>,n)^T [z_h]
          ! <w_h>
          allocate(wAver(1:Qdof, 1:ndim), source = 0.0 )
          call Eval_aver_w_Edge(elem, grid%elem(kk), ie, Qdof, wAver(1:Qdof, 1:ndim) )
          ! \mP+-
          allocate( Ppm(1:Qdof,1:2, 1:ndim, 1:ndim), source = 0.0 )
          call Set_Ppm_Euler(ndim, nbDim, Qdof, wAver(1:Qdof,1:ndim), &
            nc(1:Qdof,1:nbDim), elem%xi(ie, 1:Qdof, 1:nbDim),  &
            Ppm(1:Qdof,1:2, 1:ndim, 1:ndim), 1./elem%area, elem )

          ! compute the jump [z_h]
          allocate( zJump(1:Qdof,1:ndim), source = 0.0 )
          call ElementDualSolEdgeJump(elem, ie, zJump(1:Qdof,1:ndim))

          do l =1,Qdof
            ! (P^+)^T [z_h], 1 == +
            res(l,1:ndim) = matmul( zJump(l,1:ndim), Ppm(l,1, 1:ndim, 1:ndim) )
          end do

          deallocate(Ppm, zJump, wAver)


        ! Gamma_W - profile
        else if (elem%iBC(ie) == 0 ) then

          allocate(wi(1:Qdof,1:ndim))
          call Eval_w_Edge(elem, ie, wi, .false.)
          ! setting of outer normals in integration nodes


          allocate(HH(1:Qdof, 1:ndim, 1:ndim), source = 0.0 )

          if (state%model%discretizationOfSlipBC_Euler == 1) then

            ! evaluation of matrix
            ! HH = \mH_W^{1,L}(w^+,n)
            call Set_Ppm_Euler_Slip(ndim, nbDim, Qdof, wi(1:Qdof,1:ndim), &
                nc(1:Qdof,1:nbDim), HH(1:Qdof, 2:3, 1:ndim))

          else if (state%model%discretizationOfSlipBC_Euler == 2) then
            ! compute u_Gamma(w) = w - (w*n)n ! length of n is not important here
            call UpdateMirror(ndim, Qdof, wi(1:Qdof,1:ndim), nc(1:Qdof, 1:nbDim))
            ! evaluation of matrix
            ! HH = \mH_W^{1,L}(u_\Gamma(w^+),n)
            call Set_Ppm_Euler_Slip(ndim, nbDim, Qdof, wi(1:Qdof,1:ndim), &
                nc(1:Qdof,1:nbDim), HH(1:Qdof, 2:3, 1:ndim))

            ! \mP_W * \mU_\Gamma
            allocate( ppm_loc(1:Qdof), source = 0.0)
            do k=2,3
              ! column 2
              ppm_loc(1:Qdof) = HH(1:Qdof, k, 2) !against wrong rewriting entries

              HH(1:Qdof, k, 2) = HH(1:Qdof,k, 2) &
                   - HH(1:Qdof, k, 2) * nc(1:Qdof,1) * nc(1:Qdof,1) &
                   - HH(1:Qdof, k, 3) * nc(1:Qdof,1) * nc(1:Qdof,2)

              ! column 3
              ! watch for rewritting the entries
              HH(1:Qdof, k, 3) = HH(1:Qdof, k, 3) &
                   - ppm_loc(1:Qdof) * nc(1:Qdof,2) * nc(1:Qdof,1) &
                   - HH(1:Qdof, k, 3) * nc(1:Qdof,2) * nc(1:Qdof,2)
            enddo
            deallocate(ppm_loc)

          else if (state%model%discretizationOfSlipBC_Euler == 3) then

            ! u_Gamma(w) = w - (w*n)n = (w + Mir(w)) / 2
            call UpdateMirror(ndim, Qdof, wi, nc(1:Qdof,1:nbDim))

            allocate(Ppm(1:Qdof, 1:nbDim, 1:ndim, 1:ndim) )
            call Set_Ppm_Euler(ndim, nbDim, Qdof, wi(1:Qdof,1:ndim), nc(1:Qdof,1:nbDim), &
              elem%xi(ie, 1:Qdof, 1:nbDim),  &
              Ppm(1:Qdof,1:nbDim, 1:ndim, 1:ndim), 1./elem%area, elem )

            if(elem%ibcur > 0  .and. elem%jcur == ie) then  ! UNIT normal
              nc(1:Qdof,1) = elem%nc(1:Qdof,1)/ elem%dnc(1:Qdof)
              nc(1:Qdof,2) = elem%nc(1:Qdof,2)/ elem%dnc(1:Qdof)
            else
              nc(1:Qdof,1) = elem%n(ie,1) / elem%dn(ie)
              nc(1:Qdof,2) = elem%n(ie,2) / elem%dn(ie)
            endif

            HH(1:Qdof, 1:ndim, 1:ndim) = Ppm(1:Qdof, 1, 1:ndim, 1:ndim) &
               + Ppm(1:Qdof, 2, 1:ndim, 1:ndim)

             ! (P^+) + (P^- * \mM) = (P^+) + (P^- ) - P^-(2*n*n^T)
            do k=1,ndim
              ! column 2
              HH(1:Qdof, k, 2) = HH(1:Qdof, k, 2) &
                   - 2 * Ppm(1:Qdof, 2, k, 2) * nc(1:Qdof,1) * nc(1:Qdof,1) &
                   - 2 * Ppm(1:Qdof, 2, k, 3) * nc(1:Qdof,1) * nc(1:Qdof,2)
              ! column 3
              HH(1:Qdof, k, 3) = HH(1:Qdof, k, 3) &
                   - 2 * Ppm(1:Qdof, 2, k, 2) * nc(1:Qdof,2) * nc(1:Qdof,1) &
                   - 2 * Ppm(1:Qdof, 2, k, 3) * nc(1:Qdof,2) * nc(1:Qdof,2)
            enddo

            deallocate(Ppm)
          endif

          ! HH^T * (theta - \zh^+)
          ! compute zh^+
          allocate( zJump(1:Qdof,1:ndim), source = 0.0 )
          call Eval_z_Edge(elem, ie, zJump(1:Qdof,1:ndim), .false.)
          ! HH^T z_h^+, 1 == +
          do l =1,Qdof
            res(l,1:ndim) = matmul( zJump(l,1:ndim), HH(l, 1:ndim, 1:ndim) )
          end do
          deallocate(zJump, wi, HH)

        ! Gamma_IO - input/output
        else

        ! - \mP^+(wh^+,n)^T z_h^+
          allocate(wAver(1:Qdof, 1:ndim), source = 0.0 )
          call Eval_w_Edge( elem, ie, wAver(1:Qdof, 1:ndim), .false.)

          ! \mP+-
          allocate( Ppm(1:Qdof,1:2, 1:ndim, 1:ndim), source = 0.0 )
          call Set_Ppm_Euler(ndim, nbDim, Qdof, wAver(1:Qdof,1:ndim), &
            nc(1:Qdof,1:nbDim), elem%xi(ie, 1:Qdof, 1:nbDim),  &
            Ppm(1:Qdof,1:2, 1:ndim, 1:ndim), 1./elem%area, elem )


          ! compute the jump [z_h]
          allocate( zJump(1:Qdof,1:ndim), source = 0.0 )
          call Eval_z_Edge(elem, ie, zJump(1:Qdof,1:ndim), .false.)

          do l =1,Qdof
            ! (P^+)^T z_h^+, 1 == +
            res(l,1:ndim) = matmul( zJump(l,1:ndim), Ppm(l,1, 1:ndim, 1:ndim) )
          end do

          deallocate(Ppm, zJump, wAver)

        end if


        call IntegrateFunctionVecEdgeSquared( elem, ie, ndim, res(1:Qdof,1:ndim), estimBEdge(1:ndim) )

        deallocate(res, nc)
        estimB(1:ndim) = estimB(1:ndim) + estimBEdge(1:ndim)
     end do


   end subroutine DWRAniEstimElemDual_Edges_Euler

     !> DWR estimation of the VOLUME DUAL residual over one element
   !> STATIONARY PROBLEMS ONLY
   !> compute || j_om + \nabla * (A \nabla z_h) + \bkb * grad(z_h) - c z_h ||_K
   function DWRAniEstimElemDual_Volume_Euler(elem, J) result( estim )
      type(element), intent(in) :: elem
      class( Target_functional_t ),intent(in) :: J ! target functional !only for volume J
      real, dimension(1:ndim) :: estim

      integer :: Qnum, dof, Qdof, k, l
      real, dimension(1:elem%Qdof,1:ndim) :: wi ! res, divSigma, wi, S
      real, dimension(1:elem%Qdof, 1:ndim, 1:nbDim) :: Dzi
      real, dimension(1:elem%Qdof,1:nbDim) :: x

      real, allocatable, dimension(:,:,:,:) :: A_s
      real, allocatable, dimension(:,:) :: res



      if (ndim == 4 .and. state%model%Re == 0.0 .and. state%modelName == 'NSe') then

         Qnum = elem%Qnum
         !tQnum = elem%TQnum ! should be one for stationary problems
         dof = elem%dof
         Qdof = elem%Qdof

         allocate( res(1:Qdof, 1:ndim), source = 0.0)
         allocate( A_s(1:Qdof,1:nbDim,1:ndim,1:ndim), source = 0.0)

         x(1:Qdof, 1:nbDim) = state%space%V_rule(Qnum)%lambda(1:Qdof,1:nbDim)
         call Eval_w_Elem(elem, wi(1:Qdof,1:ndim) )

         call Set_A_s_Euler(ndim, nbDim, Qdof, wi(1:Qdof,1:ndim), &
              A_s(1:Qdof,1:nbDim,1:ndim,1:ndim), x(1:Qdof,1:nbDim), 0)
              ! last argument is not used

         ! compute grad(z)
         !allocate(zi(1:Qdof,1:ndim), source = 0.0)
         if (size(elem%zST,3) > 1) then
            stop "DWRAniEstimElemDual_Volume_Euler only for q = 0"
         end if
         !call Eval_Vec_Elem(elem, dof, elem%zST(l,1:dof,1), zi(1:Qdof,1:ndim) )
         ! only for stationary problems ie q = 0
         call Eval_DVecTransposed_Elem(elem, dof, elem%zST(1:ndim,1:dof,1), &
          Dzi(1:Qdof,1:ndim,1:nbDim) )

         ! res = A_s^T(\wi) Dz/Dx_s
         do k = 1, Qdof
           do l = 1,nbDim
           ! A_s(w)^T*(Dz/Dx_s)
             res(k, 1:ndim) = res(k, 1:ndim) + &
               matmul( Dzi(k,1:ndim, l), A_s(k,l,1:ndim,1:ndim) )
           end do !s
         end do

         ! compute the L2 norm squared
         call IntegrateSquareVectorFunction2( elem, res(1:Qdof, 1:ndim), &
                  estim(1:ndim) )
         deallocate( A_s, res )

      else
         stop "DWRAniEstimElemDual_Volume_Euler is not siuted fot this kind od problem!"
      end if

    end function DWRAniEstimElemDual_Volume_Euler


 end module dwr_res
