1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE ParallelListComp #-}
4 {-# OPTIONS_GHC -Wno-orphans #-}
6 module Phylomemy.TemporalMatchingSpec where
8 import Control.Monad (Monad (..), foldM, foldM_, forM_, void, when)
9 import Data.Eq (Eq (..))
10 import Data.Function (($), (&), (.))
11 import Data.Functor (Functor (..), (<$>), (<&>))
12 import Data.GenValidity
13 import Data.GenValidity.Map ()
14 import Data.GenValidity.Sequence ()
15 import Data.GenValidity.Set ()
16 import Data.GenValidity.Text ()
17 import Data.GenValidity.Time ()
19 import Data.List qualified as List
20 import Data.Map.Strict qualified as Map
21 import Data.Maybe (Maybe (..), fromJust)
23 import Data.Sequence qualified as Seq
24 import Data.Set qualified as Set
25 import Data.Text.Short (ShortText)
26 import Data.Text.Short qualified as ShortText
27 import Data.Time (UTCTime)
28 import Data.Tree qualified as Tree
29 import Data.Validity.Map ()
30 import Data.Validity.Set ()
31 import Data.Validity.Text ()
32 import Debug.Pretty.Simple (pTraceShow, pTraceShowId)
33 import GHC.IsList (toList)
34 import GHC.Stack (HasCallStack)
36 import Numeric (showFFloat)
37 import Numeric.Natural (Natural)
38 import System.FilePath ((<.>))
40 import Test.Syd.Validity
41 import Text.Show (Show (..))
42 import Prelude (Double, Integral (..), Num (..), Rational, fromRational)
45 import Phylomemy.IndexationSpec (Pos (..), Rang)
49 rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos)
55 { documentPosition = Pos (2 * rangeIndex + 3 * docIndex)
56 , documentRoots = Map.fromList [(r, ()) | r <- roots]
63 [ [["a", "b", "c"], ["a", "d", "e"], ["e", "f", "g"]]
64 , [["a", "b"], ["d", "f"], ["a", "d"]]
65 , [["f"], ["d", "f"], ["f", "g", "a"]]
66 , [["b", "c", "e"], ["a", "d", "e"], ["a", "b", "c"]]
67 , [["d", "f", "g"], ["b", "f"], ["a", "c", "d"], ["a", "f"]]
68 , [["c", "d", "g"], ["b", "c", "g"], ["a", "b", "c"], ["e", "g"]]
70 | rangeIndex <- [1 ..]
74 rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos)
78 [ (Pos (pos - (pos `rem` 2)), Seq.singleton doc)
79 | doc <- docs0 & toList
80 , let Pos pos = documentPosition doc
84 spec :: HasCallStack => Spec
87 describe "splitMaximalSpanningTree" do
88 let mst0 :: MaximalSpanningTree Int Int =
89 Tree.Node MSTNode{mstNodeRangeCluster=(2,1), mstNodeSimilarity=proba1}
90 [ Tree.Node MSTNode{mstNodeRangeCluster=(1,1), mstNodeSimilarity=proba0} []
91 , Tree.Node MSTNode{mstNodeRangeCluster=(1,2), mstNodeSimilarity=assertProbability 0.2} []
92 , Tree.Node MSTNode{mstNodeRangeCluster=(1,3), mstNodeSimilarity=assertProbability 0.3}
93 [ Tree.Node MSTNode{mstNodeRangeCluster=(2,3), mstNodeSimilarity=assertProbability 0.3} []
94 , Tree.Node MSTNode{mstNodeRangeCluster=(2,4), mstNodeSimilarity=proba0}
95 [ Tree.Node MSTNode{mstNodeRangeCluster=(1,4), mstNodeSimilarity=assertProbability 0.2}
96 [ Tree.Node MSTNode{mstNodeRangeCluster=(2,2), mstNodeSimilarity=assertProbability 0.3} []
97 , Tree.Node MSTNode{mstNodeRangeCluster=(2,5), mstNodeSimilarity=assertProbability 0.3} []
102 goldenBuilder ("mst=0" <.> "split=0") $
103 dotMaximalSpanningForest [mst0]
104 goldenBuilder ("mst=0" <.> "split=1") $
105 dotMaximalSpanningForest $
106 splitMaximalSpanningTree mst0
107 goldenBuilder ("mst=0" <.> "split=2") $
108 dotMaximalSpanningForest $
110 & splitMaximalSpanningTree
111 >>= splitMaximalSpanningTree
112 goldenBuilder ("mst=0" <.> "split=3") $
113 dotMaximalSpanningForest $
115 & splitMaximalSpanningTree
116 >>= splitMaximalSpanningTree
117 >>= splitMaximalSpanningTree
119 letName rangeToDocs0 $ \rangeToDocs ->
120 letName ["a", "b", "c", "d", "e", "f", "g"] $ \roots ->
121 forM_ ([1 .. 1] :: [Int]) \minSupp ->
122 forM_ ([2 .. 2] :: [Int]) \minSize -> do
123 let clusters = clusterize roots (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize) rangeToDocs
124 -- let allSimils = allSimilarities similarityJaccard (clusters <&> unName)
125 let msf = maximalSpanningForest similarityJaccard (clusters <&> unName)
126 describe "dotMaximalSpanningForest" do
127 forM_ ([0.3] :: [Rational]) \lambda -> do
128 letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
129 forM_ (msfPrune predMeasure roots msf & dotMaximalSpanningForest & Map.toList) \(scale, dot) ->
130 -- when (scale == 3) do
131 goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "" <.> "scale=" <> show scale) dot
133 -- describe "dotMaximalSpanningForest" do
134 -- ([Map.keysSet similToMST | similToMST <- msf & Map.elems] & Set.unions & toList)
135 -- & (`foldM_` msf) \acc simil -> do
136 -- let similS = showFFloat Nothing (fromRational @Double (runProbability simil)) ""
137 -- let acc' = acc & (`Map.foldrWithKey` Map.empty) \_simil mst ->
138 -- Map.unionWith (Map.unionWith (Map.unionWith (Seq.><))) $
139 -- splitMaximalSpanningTree mst
140 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "simil=" <> similS) $
141 -- dotMaximalSpanningForest acc
143 -- describe "splitMaximalSpanningTrees" do
144 -- forM_ ([0] :: [Rational]) \lambda -> do
145 -- letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
146 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $
147 -- dotMaximalSpanningForest $
148 -- splitMaximalSpanningTrees predMeasure roots msf
149 -- describe "dotSimilarities" do
150 -- forM_ ([0] :: [Rational]) \lambda -> do
151 -- --letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
152 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $
154 -- --(splitMaximalSpanningTrees predMeasure roots msf)
157 -- describe "similarities" do
158 -- goldenShow ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) weights
159 -- describe "phylomemyDOT" do
160 -- forM_ weights \minWeight ->
161 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "minWeight=" <> show minWeight) $
163 -- phylomemyRaise minWeight phy