module Phylomemy.TemporalMatching where

-- import Debug.Pretty.Simple (pTraceShow)
import Control.Monad (Monad (..), foldM, forM_, unless)
import Control.Monad.ST qualified as ST
import Data.Bool (otherwise)
import Data.Either (fromLeft)
import Data.Eq (Eq (..))
import Data.Foldable (any, foldMap', toList)
import Data.Function (($), (&), (.))
import Data.Functor (Functor (..), (<$>), (<&>))
import Data.Int (Int)
import Data.List qualified as List
import Data.Map.Strict qualified as Map
import Data.Maybe (Maybe (..))
import Data.Ord (Ord (..))
import Data.Ord qualified as Ord
import Data.Ratio (Rational, (%))
import Data.Scientific (toBoundedRealFloat)
import Data.Semigroup (Semigroup (..))
import Data.Sequence (Seq)
import Data.Sequence qualified as Seq
import Data.Set (Set)
import Data.Set qualified as Set
import Data.Tree qualified as Tree
import Data.Tuple (uncurry)
import GHC.Stack (HasCallStack)
import Logic hiding ((/))
import Numeric.Decimal qualified as Decimal
import Numeric.Probability
import Text.Show (Show)
import Prelude (Double, Num (..), fromIntegral, pi, tan, toRational, (/), (^))

import Clustering.FrequentItemSet.BruteForce qualified as Clustering
import Clustering.UnionFind.ST qualified as UnionFind
import Phylomemy.Indexation

type Similarity = Probability

cardinal :: Num i => Set a -> i
cardinal = fromIntegral . Set.size

-- TODO: implement biased similarity from the paper
similarityJaccard :: Ord.Ord a => Set a -> Set a -> Similarity
similarityJaccard x y =
  cardinal (Set.intersection x y) % cardinal (Set.union x y)
    & probability
    & Decimal.arithError

-- | A `MaximalSpanningTree` (MST) is one (possibly out of many)
-- tree spanning accross all the given `(range, cluster)` nodes,
-- with the maximal sum of edges' `Similarity` between them.
--
-- ExplanationNote: https://en.wikipedia.org/wiki/Minimum_spanning_tree
--
-- Viewing a phylomemy as a `MaximalSpanningTree`
-- is the crux of understanding how it is computed:
--
-- - the `mstMinimalSimilarity` is the next `Similarity`
-- that will split the `MaximalSpanningTree` into two or more `MaximalSpanningTree`s.
--
-- - it explains what the "scale of a phylomemy" is:
-- merging clusters of the same `range`
-- when they still belong to the same `MaximalSpanningTree`.
--
-- ImplementationNote: using a `Tree.Tree` to represent a `MaximalSpanningTree`
-- (instead of an adjacency edge map for instance)
-- is motivated by the need to implement `mstSplit`,
-- which needs to gather the `(range, cluster)` nodes
-- of each `MaximalSpanningTree` resulting from the cut,
-- because knowing that is required by `msfGlobalQuality`.
--
-- TODO: "Inadequacies of Minimum Spanning Trees in Molecular Epidemiology"
-- https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3187300/
--
-- TODO: "Divide-and-conquer based all spanning tree generation algorithm of a simple connected graph"
-- https://www.sciencedirect.com/science/article/abs/pii/S0304397521006952
--
-- WarningNote: "The tree of one percent"
-- https://link.springer.com/content/pdf/10.1186/gb-2006-7-10-118.pdf
type MaximalSpanningTree range cluster = Tree.Tree (MSTNode range cluster)

data MSTNode range cluster = MSTNode
  { mstNodeSimilarity :: Similarity
  -- ^ The `similarity` of the parent edge of this node
  -- ie. of the only edge getting closer to the root node of the `MaximalSpanningTree`.
  --
  -- ImplementationNote: `maximalSpanningForest` puts a `Similarity` of `1` for the root node.
  -- and `mstSplit` leaves the `mstMinimalSimilarity` of the edge before splitting out the `MaximalSpanningTree`.
  , mstNodeRangeCluster :: (range, cluster)
  }
  deriving (Show)

-- | A (disjoint) forest of `MaximalSpanningTree`s,
-- indexed by the minimal @(range, cluster)@ node
-- of each `MaximalSpanningTree`.
type MaximalSpanningForest range cluster =
  [MaximalSpanningTree range cluster]

-- | @(`maximalSpanningForest` similarity rangeToClusterToDocs)@
-- uses the Kruskal algorithm to find the trees spanning all
-- the given `(range, cluster)` nodes in `rangeToClusterToDocs`
-- with the maximal total `similarity` between them.
--
-- ExplanationNote: https://en.wikipedia.org/wiki/Kruskal's_algorithm
maximalSpanningForest ::
  forall range cluster doc.
  HasCallStack =>
  Ord range =>
  Ord cluster =>
  {-similarityMeasure :::-} (cluster -> cluster -> Similarity) ->
  {-clusters :::-} range :-> cluster :-> Seq (Clustering.Transaction Root doc) ->
  MaximalSpanningForest range cluster
maximalSpanningForest similarity rangeToClusterToDocs = ST.runST do
  -- DescriptionNote: create a `Point` for each given node
  -- each Point containing a root node and a maximal spanning tree.
  rangeToClusterToPoint :: range :-> cluster :-> UnionFind.Point s (MaximalSpanningTree range cluster) <-
    rangeToClusterToDocs
      & Map.traverseWithKey \srcR ->
        Map.traverseWithKey \srcC _docs ->
          UnionFind.fresh $
            Tree.Node
              MSTNode
                { mstNodeSimilarity = proba1
                , mstNodeRangeCluster = (srcR, srcC)
                }
              []
  -- DescriptionNote: iterate from the greatest to the lowest similarity edge.
  forM_ (allEdges & Map.toDescList) \(simil, edges) -> do
    -- Iterate through all the edges of that `Similarity`.
    forM_ (edges & toList) \((srcR, srcC), (dstR, dstC)) -> do
      let srcPoint = rangeToClusterToPoint Map.! srcR Map.! srcC
      let dstPoint = rangeToClusterToPoint Map.! dstR Map.! dstC
      alreadyInSameMST <- UnionFind.equivalent srcPoint dstPoint
      unless alreadyInSameMST do
        -- DescriptionNote: the nodes of this edge (src -> dst) belong to the same `MaximalSpanningTree`.
        UnionFind.unionWith srcPoint dstPoint \(Tree.Node srcRoot srcForest) (Tree.Node dstRoot dstForest) ->
          return $
            Tree.Node srcRoot $
              Tree.Node
                MSTNode
                  { mstNodeSimilarity = simil
                  , mstNodeRangeCluster = mstNodeRangeCluster dstRoot
                  }
                dstForest
                : srcForest
  rootForest :: [MaximalSpanningTree range cluster] <-
    rangeToClusterToPoint
      -- DescriptionNote: collect all the Points.
      & Map.elems
      & List.concatMap Map.elems
      -- DescriptionNote: keep only the `MaximalSpanningTree`s
      -- contained in non-redundant `Point`s.
      & (`foldM` []) \acc p -> do
        isRedundant <- UnionFind.redundant p
        if isRedundant
          then return acc
          else UnionFind.descriptor p <&> (: acc)
  return rootForest
  where
    -- Order `rangeToClusterToDocs` by `Similarity`,
    -- which will enable to add edges to the spanning tree in decreasing `Similarity`
    -- and hence build *a* maximal spanning tree.
    allEdges :: Similarity :-> Seq ((range, cluster), (range, cluster))
    allEdges =
      Map.unionsWith
        (Seq.><)
        [ Map.singleton (similarity srcC dstC) (Seq.singleton (src, dst))
        | (srcR, srcClusterToDocuments) <- rangeToClusterToDocs & Map.toList
        , (srcC, _docs) <- srcClusterToDocuments & Map.toList
        , let src = (srcR, srcC)
        , -- ExplanationNote: it does not matter whether lower or greater ranges are used for the destination nodes,
        -- as they contain the very same undirected edges and thus `Similarity`,
        -- hence would produce the same splitting `Similarity`s,
        -- though not necessarily the same `MaximalSpanningTree`.
        let (_, dstRangeToClusterToDocs) = Map.split srcR rangeToClusterToDocs
        , (dstR, dstClusterToDocs) <- dstRangeToClusterToDocs & Map.toList
        , (dstC, _docs) <- dstClusterToDocs & Map.toList
        , let dst = (dstR, dstC)
        ]

-- | "sea-level rise algorithm"
--
-- See: in `Phylomemy.References.RefDrawMeScience`,
-- « C.5 The sea-level rise algorithm and its implementation in Gargantext »
msfPrune ::
  forall range roots predictionMeasure.
  HasCallStack =>
  Show range =>
  Ord range =>
  predictionMeasure ::: (Set (range, Cluster) -> Set (range, Cluster) -> Decimal.Arith Similarity) ->
  roots ::: Set Root ->
  MaximalSpanningForest range Cluster ->
  MaximalSpanningForest range Cluster
msfPrune predictionMeasure roots =
  loop (return proba0) []
  where
    loop ::
      Decimal.Arith Probability ->
      MaximalSpanningForest range Cluster ->
      MaximalSpanningForest range Cluster ->
      MaximalSpanningForest range Cluster
    loop previousQuality doneBranches currentBranches =
      -- pTraceShow (["msfPrune", "loop"], ("previousQuality", previousQuality), ("doneBranches", List.length doneBranches), ("todoBranches", List.length currentBranches)) $
      case currentBranches of
        [] -> doneBranches
        currentBranch : todoBranches ->
          let splitBranches = mstSplit currentBranch
          in -- pTraceShow (["msfPrune", "loop", "splitBranches"], ("size", Map.size splitBranches)) $
             if List.length splitBranches <= 1
              then loop previousQuality (doneBranches <> splitBranches) todoBranches
              else letName (fmap mstNodes $ (doneBranches <> splitBranches) <> todoBranches) \mstToNodes ->
                let splitQuality = msfGlobalQuality predictionMeasure roots mstToNodes
                in if Decimal.arithError previousQuality < Decimal.arithError splitQuality
                    then -- CorrectnessFixMe: these `Decimal.arithError` may raise "arithmetic overflow",
                    -- it has to be understood.
                      loop splitQuality doneBranches (splitBranches <> todoBranches)
                    else loop previousQuality (doneBranches <> splitBranches) todoBranches

-- | @(`mstSplit` mst)@ returns the `MaximalSpanningTree`s
-- resulting from pruning out the given `MaximalSpanningTree`
-- of all the edges having the minimal `Similarity` of that `MaximalSpanningTree`.
mstSplit ::
  forall range cluster.
  HasCallStack =>
  Ord range =>
  Ord cluster =>
  MaximalSpanningTree range cluster ->
  MaximalSpanningForest range cluster
mstSplit mst =
  -- TODO: take minSimil as argument to avoid recomputing it when already known
  case mstMinimalSimilarity mst of
    Nothing -> [mst]
    Just minSimil -> cutMerge mst
      where
        cutMerge = uncurry (:) . cut
        cut ::
          MaximalSpanningTree range cluster ->
          (MaximalSpanningTree range cluster, [MaximalSpanningTree range cluster])
        cut (Tree.Node node children) =
          let (keptChildren, cutChildren) =
                children & List.partition \tree ->
                  minSimil < mstNodeSimilarity (Tree.rootLabel tree)
          in let cutChildrenRoots = cutChildren & List.concatMap cutMerge
             in let (keptChildrenForest, cutChildrenTree) = List.unzip (cut <$> keptChildren)
                in let cutChildrenTreeRoots = cutChildrenTree & List.concat & List.concatMap cutMerge
                   in ( Tree.Node node keptChildrenForest
                      , (cutChildrenRoots List.++ cutChildrenTreeRoots)
                      -- WarningNote: the root nodes of those new `MaximalSpanningTree`s
                      -- keep their `mstNodeSimilarity` to their cutting value,
                      -- which is lower than their `mstMinimalSimilarity`.
                      )

mstMinimalSimilarity :: MaximalSpanningTree range cluster -> Maybe Similarity
mstMinimalSimilarity (Tree.Node _rootNode rootBranches)
  | List.null rootBranches = Nothing
  | otherwise =
      -- ExplanationNote: the root node of a `MaximalSpanningTree`,
      -- being a root node, does not have a parent,
      -- hence its `mstNodeSimilarity` must be ignored.
      Just $
        List.minimum $
          rootBranches
            <&> Tree.foldTree \node accs ->
              List.minimum $ (mstNodeSimilarity node) : accs

-- | @(`mstSplittingSimilarities` mst)@
-- returns the `Similarity`s causing the given `MaximalSpanningForest`
-- to split into further more `MaximalSpanningForest`.
mstSplittingSimilarities :: MaximalSpanningTree range cluster -> Set Similarity
mstSplittingSimilarities (Tree.Node _rootNode rootBranches)
  | List.null rootBranches = Set.empty
  | otherwise =
      rootBranches
        & foldMap'
          ( Tree.foldTree \node accs ->
              Set.unions (Set.singleton (mstNodeSimilarity node) : accs)
          )

type Scale = Int -- > 0

-- | @(`mstScales` msf)@
-- returns the successive results of calling `mstSplit` on the given `MaximalSpanningTree`s in @(msf)@
-- until each one of them is a single node.
-- Even though the greatest `Scale`s may be different for each `MaximalSpanningTree` of @(msf)@,
-- the result are grouped by `Scale` then by `MaximalSpanningTree`,
-- which enabled to have a maximal `Scale` for the whole `msf`.
mstScales ::
  forall range cluster.
  Ord range =>
  Ord cluster =>
  MaximalSpanningForest range cluster ->
  Scale :-> {-mst-} Int :-> MaximalSpanningForest range cluster
mstScales initMSF =
  Map.unionsWith
    (Map.unionWith (List.++))
    [ Map.fromDistinctAscList
      [ (scale, Map.singleton mstI msf)
      | (scale, msf) <- scaleToMsf
      ]
    | (mstI, mst) <- initMSF & List.zip [1 :: Int ..]
    , let scaleToMsf :: [(Scale, MaximalSpanningForest range cluster)] =
            -- scanl :: (acc -> scale -> acc) -> acc -> [scale] -> [acc]
            splitSimils
              & Map.keys
              & List.scanl'
                (\(scale, msf) _minSimil -> (scale + 1, msf >>= mstSplit))
                (1, [mst])
    ]
  where
    splitSimils :: Similarity :-> () =
      Map.unions
        [ mst
          & Tree.foldTree \node accs ->
            Map.unionsWith
              (\() () -> ())
              (Map.singleton (mstNodeSimilarity node) () : accs)
        | mst <- initMSF
        ]

-- | @(`mstNodes` mst)@ returns the nodes of the given @(mst)@.
mstNodes ::
  HasCallStack =>
  Ord range =>
  Ord cluster =>
  MaximalSpanningTree range cluster ->
  Set (range, cluster)
mstNodes = foldMap' (Set.singleton . mstNodeRangeCluster)

-- | The global quality of a `Phylomemy`.
--
-- DescriptionNote: `msfGlobalQuality` counts the `(range, Cluster)` nodes of a branch
-- but that a `(range, Cluster)` in itself can gather several documents.
msfGlobalQuality ::
  HasCallStack =>
  Ord range =>
  predictionMeasure ::: (Set (range, Cluster) -> Set (range, Cluster) -> Decimal.Arith Similarity) ->
  roots ::: Set Root ->
  mstToNodes ::: [Set (range, Cluster)] ->
  Decimal.Arith Similarity
msfGlobalQuality (Named predictionMeasure) (Named roots) (Named mstToNodes) =
  List.sum
    [ probability (1 % cardinal roots)
      * List.sum
        [ probability (cardinal retrievedNodes % nodesTotal)
          * predictionMeasure relevantNodes retrievedNodes
        | retrievedNodes <- branches
        ]
    | root <- roots & toList
    , -- CorrectnessNote: ignore branches not containing any requested `root`
    let branches = mstToNodes & List.filter (any (\(_r, c) -> Set.member root c))
    , let nodesTotal = branches & List.map (cardinal @Int) & List.sum & fromIntegral
    , let relevantNodes = branches & foldMap' (Set.filter (\(_r, c) -> Set.member root c))
    ]

-- | L'idée de la F-mesure est de s'assurer qu'un classificateur fait de bonnes
-- prédictions de la classe pertinente (bonne précision)
-- en suffisamment grand nombre (bon rappel)
-- sur un jeu de données cible.
--
-- Tout comme la précision et le rappel,
-- la F-mesure varie de 0 (plus mauvaise valeur)
-- à 1 (meilleure valeur possible).
--
-- ExplanationNote: https://en.wikipedia.org/wiki/F-score
predictionMeasureF ::
  HasCallStack =>
  Ord range =>
  {-lambda :::-}

  -- | Trade-off between `precision` and `recall`.
  -- See https://en.wikipedia.org/wiki/Precision_and_recall
  --
  -- > [It] predetermine[s] the desired shape of the phylomemy: a continent
  -- > (i.e., one large branch) or an archipelago of elements of knowledge
  -- > (i.e., many small branches). The estimation of `lambda` is left to the researcher’s
  -- > discretion in light of her own expertise and research questions, which makes
  -- > any phylomemy an artifact of the researcher’s perception
  --
  -- For @(lambda == 0)@, only the `precision` counts, whereas for @(lambda == 1)@, only the `recall` counts.
  Similarity ->
  Set (range, Cluster) ->
  Set (range, Cluster) ->
  Decimal.Arith Similarity
predictionMeasureF lambda relevantNodes retrievedNodes
  | lambda == proba0 = probability precision
  | lambda == proba1 = probability recall
  | otherwise = probability $ precision * recall * (1 + betaSquare) / (recall + betaSquare * precision)
  where
    precision = cardinal relevantRetrievedNodes % cardinal retrievedNodes
    recall = cardinal relevantRetrievedNodes % cardinal relevantNodes
    relevantRetrievedNodes = Set.intersection relevantNodes retrievedNodes
    lambdaDouble = lambda & Decimal.toScientificDecimal & toBoundedRealFloat @Double & fromLeft 1
    -- ExplanationNote: the `tan` is just to spread the `lambda ∈ [0,1]` from -∞ to +∞
    -- Two commonly used values for β are:
    -- - 2, which weighs recall higher than precision,
    -- - and 0.5, which weighs recall lower than precision.
    beta = tan (lambdaDouble * pi / 2)
    betaSquare :: Rational
    betaSquare = beta ^ (2 :: Int) & toRational