{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE RankNTypes #-}

module Phylomemy.TemporalMatching where

-- import Data.Traversable (traverse)
-- import Debug.Pretty.Simple (pTraceShow, pTraceShowId)
import Control.Monad (Monad (..), foldM, forM_, unless)
import Control.Monad.ST qualified as ST
import Data.Bool (otherwise)
import Data.Either (fromLeft)
import Data.Eq (Eq (..))
import Data.Foldable (any, foldMap', toList)
import Data.Function (($), (&), (.))
import Data.Functor (Functor (..), (<$>), (<&>))
import Data.Int (Int)
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Maybe (Maybe (..))
import Data.Ord (Ord (..))
import Data.Ord qualified as Ord
import Data.Ratio (Rational, (%))
import Data.Scientific (toBoundedRealFloat)
import Data.Semigroup (Semigroup (..))
import Data.Sequence (Seq)
import Data.Sequence qualified as Seq
import Data.Set (Set)
import Data.Set qualified as Set
import Data.Tree qualified as Tree
import Data.Tuple (uncurry)
import GHC.Stack (HasCallStack)
import Logic hiding ((/))
import Numeric.Decimal qualified as Decimal
import Numeric.Probability
import Text.Show (Show)
import Prelude (Double, Num (..), fromIntegral, pi, tan, toRational, (/), (^))

import Clustering.FrequentItemSet.BruteForce qualified as Clustering
import Clustering.UnionFind.ST qualified as UnionFind
import Phylomemy.Indexation

type Similarity = Probability

cardinal :: Num i => Set a -> i
cardinal = fromIntegral . Set.size

-- TODO: implement biased similarity from the paper
similarityJaccard :: Ord.Ord a => Set a -> Set a -> Similarity
similarityJaccard x y =
  cardinal (Set.intersection x y) % cardinal (Set.union x y)
    & probability
    & Decimal.arithError

-- | `AllSimilarities` maps each @(range, cluster)@ node
-- to *all* the other @(range, cluster)@ nodes from a different @(range)@.
--
-- Note that the destination nodes are first ordered by their range
-- which enable to find the closest from the source's range.
type AllSimilarities range cluster =
  {-source-} range :-> cluster :-> {-destination-} Direction (range :-> Similarity :-> Seq cluster)

-- | A phylomemy can be viewed in two directions.
data Direction a = Direction
  { directionUpstream :: a
  -- ^ Lookup for the closest similar enough destinations
  -- in ranges lower than its source's range.
  , directionDownstream :: a
  -- ^ Lookup for the closest similar enough destinations
  -- in ranges greater than its source's range.
  }
  deriving (Functor, Show)

-- | @(`allSimilarities` similarityMeasure rangeToClusterToDocuments)@
-- computes all the temporal similarities for the given documents.
allSimilarities ::
  Ord range =>
  {-similarityMeasure :::-} (cluster -> cluster -> Similarity) ->
  {-clusters :::-} range :-> cluster :-> Seq (Clustering.Transaction Root (Document pos)) ->
  AllSimilarities range cluster
allSimilarities similarity rangeToClusterToDocs =
  rangeToClusterToDocs
    & Map.mapWithKey \srcR ->
      Map.mapWithKey \srcC _docs ->
        let
          (rangeToClusterToDocsLT, rangeToClusterToDocsGT) = Map.split srcR rangeToClusterToDocs
          goStream = Map.mapMaybeWithKey \_rangeNE dstCD ->
            deleteEmptyMap $
              Map.fromListWith
                (Seq.><)
                [ (simil, Seq.singleton dstC)
                | dstC <- dstCD & Map.keys
                , let simil = similarity srcC dstC
                , proba0 < simil
                ]
        in
          Direction
            { directionUpstream = goStream rangeToClusterToDocsLT
            , directionDownstream = goStream rangeToClusterToDocsGT
            }
  where
    deleteEmptyMap m
      | Map.null m = Nothing
      | otherwise = Just m

-- -- | @(`similarities` phy)@
-- --  returns the set of `Similarity`s used by the given `Phylomemy`.
-- similaritiesLevels :: AllSimilarities range cluster -> Set Similarity
-- similaritiesLevels =
--   Map.foldMapWithKey \_ ->
--     Map.foldMapWithKey \_ dir ->
--       Map.foldMapWithKey (\_ -> Set.fromList . Map.keys) (directionDownstream dir)

-- -- | @(`phylomemyRaise` minSimil phy)@
-- -- filter-in only edges whose similarity is greater or equal to the given @(minSimil)@.
-- phylomemyRaise :: Similarity -> AllSimilarities range cluster -> AllSimilarities range cluster
-- phylomemyRaise minSimil =
--   Map.map $
--     Map.map $
--       fmap @Direction $
--         Map.mapMaybe $
--           (deleteEmptyMap .) $ \childSC ->
--             let (_lt, eqM, gt) = Map.splitLookup minSimil childSC
--             in maybe gt (\eq -> Map.insert minSimil eq gt) eqM
--   where
--     deleteEmptyMap m
--       | Map.null m = Nothing
--       | otherwise = Just m

-- | A `MaximalSpanningTree` is one (possibly out of many)
-- maximal (similarity) spanning tree (MST),
-- mapping each source @(range, cluster)@ to many destination @(range, cluster)@,
-- from the lowest to the greatest `Similarity`.
--
-- ExplanationNote: https://en.wikipedia.org/wiki/Minimum_spanning_tree
--
-- Viewing a phylomemy as a MST is the crux of understanding how it is computed:
--
-- - it makes it easy to prune edges with low similarity
-- and know how many `MaximalSpanningTree`es this creates.
--
-- - it explains what the "scale of a phylomemy" is:
-- considering clusters of the same range and same MST as a single node.
--
-- The order in which edges may be deleted is known in advance (aka. "offline"):
-- it goes from the lowest similarity (0) to the highest similarity (1)
-- (tie breaking with an order on the @(range, cluster)@ nodes)
-- therefore computing a maximal spanning tree (MaximalSpanningTree) wrt. similarities
-- gives:
--
-- - the value of the next `Similarity` causing a split:
-- it is the minimal `Similarity` inside this `MaximalSpanningTree`.
--
-- - the resulting `MaximalSpanningTree`s after removing the edges
-- with that minimal `Similarity`.
--
-- TODO: "Inadequacies of Minimum Spanning Trees in Molecular Epidemiology"
-- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3187300/
--
-- TODO: "Divide-and-conquer based all spanning tree generation algorithm of a simple connected graph"
-- https://www.sciencedirect.com/science/article/abs/pii/S0304397521006952
--
-- WarningNote: "The tree of one percent"
-- https://link.springer.com/content/pdf/10.1186/gb-2006-7-10-118.pdf
type MaximalSpanningTree range cluster = Tree.Tree (MSTNode range cluster)

data MSTNode range cluster = MSTNode
  { mstNodeSimilarity :: Similarity
  , mstNodeRangeCluster :: (range, cluster)
  -- , nodeMSTMinSimilarity :: Min Similarity
  }
  deriving (Show)

-- | A (disjoint) forest of `MaximalSpanningTree`s,
-- indexed by the minimal @(range, cluster)@ node
-- of each `MaximalSpanningTree`.
type MaximalSpanningTrees range cluster =
  [MaximalSpanningTree range cluster]

-- | @(`maximalSpanningTrees` allSimils)@
-- uses the Kruskal algorithm to find the maximal spanning trees
-- of the given `AllSimilarities`.
--
-- ExplanationNote: https://en.wikipedia.org/wiki/Kruskal's_algorithm
maximalSpanningTrees ::
  forall range cluster.
  Ord range =>
  Ord cluster =>
  AllSimilarities range cluster ->
  MaximalSpanningTrees range cluster
maximalSpanningTrees allSimils = ST.runST do
  -- DescriptionNote: create a `Point` for each given node
  -- each Point containing a root node and a maximal spanning tree.
  rangeToClusterToPoint :: (range :-> cluster :-> UnionFind.Point s (MaximalSpanningTree range cluster)) <-
    allSimils
      & Map.traverseWithKey \srcR ->
        Map.traverseWithKey \srcC _similToEdges ->
          UnionFind.fresh $
            Tree.Node
              MSTNode
                { mstNodeSimilarity = proba1
                , mstNodeRangeCluster = (srcR, srcC)
                }
              []
  -- DescriptionNote: iterate from the greatest to the lowest similarity edge.
  forM_ (allEdges & Map.toDescList) \(simil, edges) -> do
    -- Iterate through all the edges of that `Similarity`.
    forM_ (edges & toList) \((srcR, srcC), (dstR, dstC)) -> do
      let srcPoint = rangeToClusterToPoint Map.! srcR Map.! srcC
      let dstPoint = rangeToClusterToPoint Map.! dstR Map.! dstC
      alreadyInSameMST <- UnionFind.equivalent srcPoint dstPoint
      unless alreadyInSameMST do
        -- DescriptionNote: the nodes of this edge (src -> dst) belong to the same `MaximalSpanningTree`.
        UnionFind.unionWith srcPoint dstPoint \(Tree.Node srcRoot srcForest) (Tree.Node dstRoot dstForest) ->
          return $
            Tree.Node srcRoot $
              Tree.Node
                MSTNode
                  { mstNodeSimilarity = simil
                  , mstNodeRangeCluster = mstNodeRangeCluster dstRoot
                  }
                dstForest
                : srcForest
  rootTrees :: [MaximalSpanningTree range cluster] <-
    rangeToClusterToPoint
      -- DescriptionNote: collect all the Points.
      & Map.elems
      & List.concatMap Map.elems
      -- DescriptionNote: keep only the `MaximalSpanningTree`s
      -- contained in non-redundant `Point`s.
      & (`foldM` []) \acc p -> do
        isRedundant <- UnionFind.redundant p
        if isRedundant
          then return acc
          else UnionFind.descriptor p <&> (: acc)
  return rootTrees
  where
    -- Order `allSimils` by `Similarity`, ie.
    -- from: {-source-} range :-> cluster :-> {-destination-} Direction (range :-> Similarity :-> Seq cluster)
    -- to: Similarity :-> Seq ((range, cluster), (range, cluster))
    -- which will enable to add edges to the spanning tree in decreasing `Similarity`
    -- and hence build *a* maximal spanning tree.
    allEdges :: Similarity :-> Seq ((range, cluster), (range, cluster))
    allEdges =
      Map.unionsWith
        (Seq.><)
        [ dstSCs & Map.map (fmap \dstC -> (src, (dstR, dstC)))
        | (srcR, srcCRSC) <- allSimils & Map.toList
        , (srcC, dstRSC) <- srcCRSC & Map.toList
        , let src = (srcR, srcC)
        , -- ExplanationNote: it does not matter whether `directionDownstream` or `directionUpstream` is used,
        -- as they contain the very same edges and `Similarity`, just not in the same order,
        -- hence using any `Direction` would produce the same splitting similarities.
        (dstR, dstSCs) <- directionDownstream dstRSC & Map.toList
        ]

-- | "sea-level rise algorithm"
--
-- See: in `Phylomemy.References.RefDrawMeScience`,
-- « C.5 The sea-level rise algorithm and its implementation in Gargantext »
splitMaximalSpanningTrees ::
  HasCallStack =>
  forall range roots predictionMeasure.
  Show range =>
  Ord range =>
  predictionMeasure ::: (Set (range, Cluster) -> Set (range, Cluster) -> Decimal.Arith Similarity) ->
  roots ::: Set Root ->
  MaximalSpanningTrees range Cluster ->
  MaximalSpanningTrees range Cluster
splitMaximalSpanningTrees predictionMeasure roots =
  loop (return proba0) []
  where
    loop previousQuality doneBranches currentBranches =
      -- pTraceShow (["splitMaximalSpanningTrees", "loop"], ("previousQuality", previousQuality), ("doneBranches", List.length doneBranches), ("todoBranches", List.length currentBranches)) $
      case currentBranches of
        [] -> doneBranches
        currentBranch : todoBranches ->
          let splitBranches = splitMaximalSpanningTree currentBranch
          in -- pTraceShow (["splitMaximalSpanningTrees", "loop", "splitBranches"], ("size", Map.size splitBranches)) $
             if List.length splitBranches <= 1
              then loop previousQuality (doneBranches <> splitBranches) todoBranches
              else letName (fmap maximalSpanningTreeToNodes $ (doneBranches <> splitBranches) <> todoBranches) \branchToNodes ->
                let splitQuality = globalQuality predictionMeasure roots branchToNodes
                in if previousQuality < splitQuality
                    then loop splitQuality doneBranches (splitBranches <> todoBranches)
                    else loop previousQuality (doneBranches <> splitBranches) todoBranches

splitMaximalSpanningTree ::
  forall range cluster.
  HasCallStack =>
  Show range =>
  Show cluster =>
  Ord range =>
  Ord cluster =>
  MaximalSpanningTree range cluster ->
  MaximalSpanningTrees range cluster
splitMaximalSpanningTree mst =
  case minimumSimilarity mst of
    Nothing -> [mst]
    Just minSimil -> cutMerge mst
      where
        cutMerge = uncurry (:) . cut
        cut :: MaximalSpanningTree range cluster -> (MaximalSpanningTree range cluster, [MaximalSpanningTree range cluster])
        cut (Tree.Node node children) =
          let (keptChildren, cutChildren) =
                children & List.partition \tree ->
                  minSimil < mstNodeSimilarity (Tree.rootLabel tree)
          in let cutChildrenRoots = cutChildren & List.concatMap cutMerge
             in let (keptChildrenTrees, cutChildrenTree) = List.unzip (cut <$> keptChildren)
                in let cutChildrenTreeRoots = cutChildrenTree & List.concat & List.concatMap cutMerge
                   in ( Tree.Node node keptChildrenTrees
                      , (cutChildrenRoots List.++ cutChildrenTreeRoots)
                      -- WarningNote: the root nodes of those new `MaximalSpanningTree`s
                      -- keep their `mstNodeSimilarity` to their cutting value,
                      -- which is lower than their `minimumSimilarity`.
                      )

minimumSimilarity :: MaximalSpanningTree range cluster -> Maybe Similarity
minimumSimilarity (Tree.Node _rootNode rootBranches)
  | List.null rootBranches = Nothing
  | otherwise =
      -- ExplanationNote: the root node of a `MaximalSpanningTree`,
      -- being a root node, does not have parent,
      -- hence its `mstNodeSimilarity` must be ignored.
      Just $
        List.minimum $
          rootBranches
            <&> Tree.foldTree \node accs ->
              List.minimum $ (mstNodeSimilarity node) : accs

-- | @(`maximalSpanningTreeToNodes` branch)@ returns the nodes of the given @(branch)@.
maximalSpanningTreeToNodes ::
  HasCallStack =>
  Ord range =>
  Ord cluster =>
  MaximalSpanningTree range cluster ->
  Set (range, cluster)
maximalSpanningTreeToNodes = foldMap' (Set.singleton . mstNodeRangeCluster)

-- | @(splittingSimilarities maxSpanTrees)@
-- returns the `Similarity`s causing the given `MaximalSpanningTrees`
-- to split into further more `MaximalSpanningTrees`.
-- splittingSimilarities :: HasCallStack => MaximalSpanningTrees range cluster -> Set Similarity
-- splittingSimilarities = foldMap' $ Map.foldMapWithKey \simil _ -> Set.singleton simil

-- | The global quality of a `Phylomemy`.
--
-- DescriptionNote: `globalQuality` counts the `(range, Cluster)` nodes of a branch
-- but that a `(range, Cluster)` in itself can gather several documents.
globalQuality ::
  HasCallStack =>
  Ord range =>
  predictionMeasure ::: (Set (range, Cluster) -> Set (range, Cluster) -> Decimal.Arith Similarity) ->
  roots ::: Set Root ->
  branchToNodes ::: [Set (range, Cluster)] ->
  Decimal.Arith Similarity
globalQuality (Named predictionMeasure) (Named roots) (Named branchToNodes) =
  List.sum
    [ probability (1 % cardinal roots)
      * List.sum
        [ probability (cardinal retrievedNodes % nodesTotal)
          * predictionMeasure relevantNodes retrievedNodes
        | retrievedNodes <- branches
        ]
    | root <- roots & toList
    , -- CorrectnessNote: ignore branches not containing any requested `root`
    let branches = branchToNodes & List.filter (any (\(_r, c) -> Set.member root c))
    , let nodesTotal = branches & List.map (cardinal @Int) & List.sum & fromIntegral
    , let relevantNodes = branches & foldMap' (Set.filter (\(_r, c) -> Set.member root c))
    ]

-- | L'idée de la F-mesure est de s'assurer qu'un classificateur fait de bonnes
-- prédictions de la classe pertinente (bonne précision)
-- en suffisamment grand nombre (bon rappel)
-- sur un jeu de données cible.
--
-- Tout comme la précision et le rappel,
-- la F-mesure varie de 0 (plus mauvaise valeur)
-- à 1 (meilleure valeur possible).
--
-- ExplanationNote: https://en.wikipedia.org/wiki/F-score
predictionMeasureF ::
  HasCallStack =>
  Ord range =>
  {-lambda :::-}

  -- | Trade-off between `precision` and `recall`.
  -- See https://en.wikipedia.org/wiki/Precision_and_recall
  --
  -- > [It] predetermine[s] the desired shape of the phylomemy: a continent
  -- > (i.e., one large branch) or an archipelago of elements of knowledge
  -- > (i.e., many small branches). The estimation of `lambda` is left to the researcher’s
  -- > discretion in light of her own expertise and research questions, which makes
  -- > any phylomemy an artifact of the researcher’s perception
  --
  -- For @(lambda == 0)@, only the `precision` counts, whereas for @(lambda == 1)@, only the `recall` counts.
  Similarity ->
  Set (range, Cluster) ->
  Set (range, Cluster) ->
  Decimal.Arith Similarity
predictionMeasureF lambda relevantNodes retrievedNodes
  | lambda == proba0 = probability precision
  | lambda == proba1 = probability recall
  | otherwise = probability $ precision * recall * (1 + betaSquare) / (recall + betaSquare * precision)
  where
    precision = cardinal relevantRetrievedNodes % cardinal retrievedNodes
    recall = cardinal relevantRetrievedNodes % cardinal relevantNodes
    relevantRetrievedNodes = Set.intersection relevantNodes retrievedNodes
    lambdaDouble = lambda & Decimal.toScientificDecimal & toBoundedRealFloat @Double & fromLeft 1
    -- ExplanationNote: the `tan` is just to spread `lambda`
    -- Two commonly used values for β are:
    -- - 2, which weighs recall higher than precision,
    -- - and 0.5, which weighs recall lower than precision.
    beta = tan (lambdaDouble * pi / 2)
    betaSquare :: Rational
    betaSquare = beta ^ (2 :: Int) & toRational