{-# LANGUAGE OverloadedLists #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE ParallelListComp #-} {-# OPTIONS_GHC -Wno-orphans #-} module Phylomemy.TemporalMatchingSpec where import Control.Monad (Monad (..), foldM, foldM_, forM_, void) import Data.Function (($), (&), (.)) import Data.Functor ((<&>), Functor(..), (<$>)) import Data.GenValidity import Data.GenValidity.Map () import Data.GenValidity.Sequence () import Data.Tree qualified as Tree import Data.GenValidity.Set () import Data.GenValidity.Text () import Data.GenValidity.Time () import Data.Int (Int) import Data.List qualified as List import Data.Map.Strict qualified as Map import Data.Maybe (Maybe (..), fromJust) import Data.Ord (Ord) import Data.Sequence qualified as Seq import Data.Set qualified as Set import Data.Text.Short (ShortText) import Data.Text.Short qualified as ShortText import Data.Time (UTCTime) import Data.Validity.Map () import Data.Validity.Set () import Data.Validity.Text () import Debug.Pretty.Simple (pTraceShow, pTraceShowId) import GHC.IsList (toList) import Logic import Numeric (showFFloat) import Numeric.Natural (Natural) import System.FilePath ((<.>)) import Test.Syd import Test.Syd.Validity import Text.Show (Show (..)) import Prelude (Double, Integral (..), Num (..), Rational, fromRational) import Phylomemy import Phylomemy.IndexationSpec (Pos (..), Rang) import Utils rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos) rangeToDocs0 = Map.fromList [ ( Pos rangeIndex , Seq.fromList [ Document { documentPosition = Pos (2 * rangeIndex + 3 * docIndex) , documentRoots = Map.fromList [(r, ()) | r <- roots] } | roots <- docs | docIndex <- [1 ..] ] ) | docs <- [ [["a", "b", "c"], ["a", "d", "e"], ["e", "f", "g"]] , [["a", "b"], ["d", "f"], ["a", "d"]] , [["f"], ["d", "f"], ["f", "g", "a"]] , [["b", "c", "e"], ["a", "d", "e"], ["a", "b", "c"]] , [["d", "f", "g"], ["b", "f"], ["a", "c", "d"], ["a", "f"]] , [["c", "d", "g"], ["b", "c", "g"], ["a", "b", "c"], ["e", "g"]] ] | rangeIndex <- [1 ..] ] {- rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos) rangeToDocs0 = Map.fromListWith (<>) [ (Pos (pos - (pos `rem` 2)), Seq.singleton doc) | doc <- docs0 & toList , let Pos pos = documentPosition doc ] -} spec :: Spec spec = do {- describe "splitMaximalSpanningTree" do let mst0 :: MaximalSpanningTree Int Int = Tree.Node MSTNode{mstNodeRangeCluster=(2,1), mstNodeSimilarity=proba1} [ Tree.Node MSTNode{mstNodeRangeCluster=(1,1), mstNodeSimilarity=proba0} [] , Tree.Node MSTNode{mstNodeRangeCluster=(1,2), mstNodeSimilarity=assertProbability 0.2} [] , Tree.Node MSTNode{mstNodeRangeCluster=(1,3), mstNodeSimilarity=assertProbability 0.3} [ Tree.Node MSTNode{mstNodeRangeCluster=(2,3), mstNodeSimilarity=assertProbability 0.3} [] , Tree.Node MSTNode{mstNodeRangeCluster=(2,4), mstNodeSimilarity=proba0} [ Tree.Node MSTNode{mstNodeRangeCluster=(1,4), mstNodeSimilarity=assertProbability 0.2} [ Tree.Node MSTNode{mstNodeRangeCluster=(2,2), mstNodeSimilarity=assertProbability 0.3} [] , Tree.Node MSTNode{mstNodeRangeCluster=(2,5), mstNodeSimilarity=assertProbability 0.3} [] ] ] ] ] goldenBuilder ("mst=0" <.> "split=0") $ dotMaximalSpanningTrees [mst0] goldenBuilder ("mst=0" <.> "split=1") $ dotMaximalSpanningTrees $ splitMaximalSpanningTree mst0 goldenBuilder ("mst=0" <.> "split=2") $ dotMaximalSpanningTrees $ mst0 & splitMaximalSpanningTree >>= splitMaximalSpanningTree goldenBuilder ("mst=0" <.> "split=3") $ dotMaximalSpanningTrees $ mst0 & splitMaximalSpanningTree >>= splitMaximalSpanningTree >>= splitMaximalSpanningTree -} letName rangeToDocs0 $ \rangeToDocs -> letName ["a", "b", "c", "d", "e", "f", "g"] $ \roots -> forM_ ([1 .. 1] :: [Int]) \minSupp -> forM_ ([2 .. 2] :: [Int]) \minSize -> do let clusters = clusterize roots (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize) rangeToDocs let allSimils = allSimilarities similarityJaccard (clusters <&> unName) let msf = maximalSpanningTrees allSimils describe "dotMaximalSpanningTrees" do forM_ ([0, 0.3, 1] :: [Rational]) \lambda -> do letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $ dotSimilarities allSimils $ splitMaximalSpanningTrees predMeasure roots msf -- describe "dotMaximalSpanningTrees" do -- ([Map.keysSet similToMST | similToMST <- msf & Map.elems] & Set.unions & toList) -- & (`foldM_` msf) \acc simil -> do -- let similS = showFFloat Nothing (fromRational @Double (runProbability simil)) "" -- let acc' = acc & (`Map.foldrWithKey` Map.empty) \_simil mst -> -- Map.unionWith (Map.unionWith (Map.unionWith (Seq.><))) $ -- splitMaximalSpanningTree mst -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "simil=" <> similS) $ -- dotMaximalSpanningTrees acc -- return acc' -- describe "splitMaximalSpanningTrees" do -- forM_ ([0] :: [Rational]) \lambda -> do -- letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $ -- dotMaximalSpanningTrees $ -- splitMaximalSpanningTrees predMeasure roots msf -- describe "dotSimilarities" do -- forM_ ([0] :: [Rational]) \lambda -> do -- --letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $ -- dotSimilarities -- --(splitMaximalSpanningTrees predMeasure roots msf) -- msf -- allSimils -- describe "similarities" do -- goldenShow ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) weights -- describe "phylomemyDOT" do -- forM_ weights \minWeight -> -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "minWeight=" <> show minWeight) $ -- phylomemyDOT $ -- phylomemyRaise minWeight phy