1 {-# LANGUAGE OverloadedLists #-}
 
   2 {-# LANGUAGE OverloadedStrings #-}
 
   3 {-# LANGUAGE ParallelListComp #-}
 
   4 {-# OPTIONS_GHC -Wno-orphans #-}
 
   6 module Phylomemy.TemporalMatchingSpec where
 
   8 import Control.Monad (Monad (..), foldM, foldM_, forM_, void, when)
 
   9 import Data.Eq (Eq (..))
 
  10 import Data.Function (($), (&), (.))
 
  11 import Data.Functor (Functor (..), (<$>), (<&>))
 
  12 import Data.GenValidity
 
  13 import Data.GenValidity.Map ()
 
  14 import Data.GenValidity.Sequence ()
 
  15 import Data.GenValidity.Set ()
 
  16 import Data.GenValidity.Text ()
 
  17 import Data.GenValidity.Time ()
 
  19 import Data.List qualified as List
 
  20 import Data.Map.Strict qualified as Map
 
  21 import Data.Maybe (Maybe (..), fromJust)
 
  23 import Data.Sequence qualified as Seq
 
  24 import Data.Set qualified as Set
 
  25 import Data.Text.Short (ShortText)
 
  26 import Data.Text.Short qualified as ShortText
 
  27 import Data.Time (UTCTime)
 
  28 import Data.Tree qualified as Tree
 
  29 import Data.Validity.Map ()
 
  30 import Data.Validity.Set ()
 
  31 import Data.Validity.Text ()
 
  32 import Debug.Pretty.Simple (pTraceShow, pTraceShowId)
 
  33 import GHC.IsList (toList)
 
  34 import GHC.Stack (HasCallStack)
 
  36 import Numeric (showFFloat)
 
  37 import Numeric.Natural (Natural)
 
  38 import System.FilePath ((<.>))
 
  40 import Test.Syd.Validity
 
  41 import Text.Show (Show (..))
 
  42 import Prelude (Double, Integral (..), Num (..), Rational, fromRational)
 
  45 import Phylomemy.IndexationSpec (Pos (..), Rang)
 
  49 rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos)
 
  55             { documentPosition = Pos (2 * rangeIndex + 3 * docIndex)
 
  56             , documentRoots = Map.fromList [(r, ()) | r <- roots]
 
  63         [ [["a", "b", "c"], ["a", "d", "e"], ["e", "f", "g"]]
 
  64         , [["a", "b"], ["d", "f"], ["a", "d"]]
 
  65         , [["f"], ["d", "f"], ["f", "g", "a"]]
 
  66         , [["b", "c", "e"], ["a", "d", "e"], ["a", "b", "c"]]
 
  67         , [["d", "f", "g"], ["b", "f"], ["a", "c", "d"], ["a", "f"]]
 
  68         , [["c", "d", "g"], ["b", "c", "g"], ["a", "b", "c"], ["e", "g"]]
 
  70     | rangeIndex <- [1 ..]
 
  74 rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos)
 
  78     [ (Pos (pos - (pos `rem` 2)), Seq.singleton doc)
 
  79     | doc <- docs0 & toList
 
  80     , let Pos pos = documentPosition doc
 
  84 spec :: HasCallStack => Spec
 
  87   describe "splitMaximalSpanningTree" do
 
  88     let mst0 :: MaximalSpanningTree Int Int =
 
  89           Tree.Node MSTNode{mstNodeRangeCluster=(2,1), mstNodeSimilarity=proba1}
 
  90             [ Tree.Node MSTNode{mstNodeRangeCluster=(1,1), mstNodeSimilarity=proba0} []
 
  91             , Tree.Node MSTNode{mstNodeRangeCluster=(1,2), mstNodeSimilarity=assertProbability 0.2} []
 
  92             , Tree.Node MSTNode{mstNodeRangeCluster=(1,3), mstNodeSimilarity=assertProbability 0.3}
 
  93               [ Tree.Node MSTNode{mstNodeRangeCluster=(2,3), mstNodeSimilarity=assertProbability 0.3} []
 
  94               , Tree.Node MSTNode{mstNodeRangeCluster=(2,4), mstNodeSimilarity=proba0}
 
  95                  [ Tree.Node MSTNode{mstNodeRangeCluster=(1,4), mstNodeSimilarity=assertProbability 0.2}
 
  96                    [ Tree.Node MSTNode{mstNodeRangeCluster=(2,2), mstNodeSimilarity=assertProbability 0.3} []
 
  97                    , Tree.Node MSTNode{mstNodeRangeCluster=(2,5), mstNodeSimilarity=assertProbability 0.3} []
 
 102     goldenBuilder ("mst=0" <.> "split=0") $
 
 103       dotMaximalSpanningForest [mst0]
 
 104     goldenBuilder ("mst=0" <.> "split=1") $
 
 105       dotMaximalSpanningForest $
 
 106         splitMaximalSpanningTree mst0
 
 107     goldenBuilder ("mst=0" <.> "split=2") $
 
 108       dotMaximalSpanningForest $
 
 110           & splitMaximalSpanningTree
 
 111           >>= splitMaximalSpanningTree
 
 112     goldenBuilder ("mst=0" <.> "split=3") $
 
 113       dotMaximalSpanningForest $
 
 115           & splitMaximalSpanningTree
 
 116           >>= splitMaximalSpanningTree
 
 117           >>= splitMaximalSpanningTree
 
 119   letName rangeToDocs0 $ \rangeToDocs ->
 
 120     letName ["a", "b", "c", "d", "e", "f", "g"] $ \roots ->
 
 121       forM_ ([1 .. 1] :: [Int]) \minSupp ->
 
 122         forM_ ([2 .. 2] :: [Int]) \minSize -> do
 
 123           let clusters = clusterize roots (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize) rangeToDocs
 
 124           -- let allSimils = allSimilarities similarityJaccard (clusters <&> unName)
 
 125           let msf = maximalSpanningForest similarityJaccard (clusters <&> unName)
 
 126           describe "dotMaximalSpanningForest" do
 
 127             forM_ ([0.3] :: [Rational]) \lambda -> do
 
 128               letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
 
 129                 forM_ (msfPrune predMeasure roots msf & dotMaximalSpanningForest & Map.toList) \(scale, dot) ->
 
 130                   -- when (scale == 3) do
 
 131                   goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "" <.> "scale=" <> show scale) dot
 
 133 -- describe "dotMaximalSpanningForest" do
 
 134 --  ([Map.keysSet similToMST | similToMST <- msf & Map.elems] & Set.unions & toList)
 
 135 --    & (`foldM_` msf) \acc simil -> do
 
 136 --      let similS = showFFloat Nothing (fromRational @Double (runProbability simil)) ""
 
 137 --      let acc' = acc & (`Map.foldrWithKey` Map.empty) \_simil mst ->
 
 138 --                          Map.unionWith (Map.unionWith (Map.unionWith (Seq.><))) $
 
 139 --                          splitMaximalSpanningTree mst
 
 140 --      goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "simil=" <> similS) $
 
 141 --        dotMaximalSpanningForest acc
 
 143 -- describe "splitMaximalSpanningTrees" do
 
 144 --  forM_ ([0] :: [Rational]) \lambda -> do
 
 145 --    letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
 
 146 --      goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $
 
 147 --        dotMaximalSpanningForest $
 
 148 --          splitMaximalSpanningTrees predMeasure roots msf
 
 149 -- describe "dotSimilarities" do
 
 150 -- forM_ ([0] :: [Rational]) \lambda -> do
 
 151 -- --letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
 
 152 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $
 
 154 -- --(splitMaximalSpanningTrees predMeasure roots msf)
 
 157 -- describe "similarities" do
 
 158 --   goldenShow ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) weights
 
 159 -- describe "phylomemyDOT" do
 
 160 --   forM_ weights \minWeight ->
 
 161 --     goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "minWeight=" <> show minWeight) $
 
 163 --         phylomemyRaise minWeight phy