1 {-# LANGUAGE OverloadedLists #-}
2 {-# LANGUAGE OverloadedStrings #-}
3 {-# LANGUAGE ParallelListComp #-}
4 {-# OPTIONS_GHC -Wno-orphans #-}
6 module Phylomemy.TemporalMatchingSpec where
8 import Control.Monad (Monad (..), foldM, foldM_, forM_, void)
9 import Data.Function (($), (&), (.))
10 import Data.Functor (Functor (..), (<$>), (<&>))
11 import Data.GenValidity
12 import Data.GenValidity.Map ()
13 import Data.GenValidity.Sequence ()
14 import Data.GenValidity.Set ()
15 import Data.GenValidity.Text ()
16 import Data.GenValidity.Time ()
18 import Data.List qualified as List
19 import Data.Map.Strict qualified as Map
20 import Data.Maybe (Maybe (..), fromJust)
22 import Data.Sequence qualified as Seq
23 import Data.Set qualified as Set
24 import Data.Text.Short (ShortText)
25 import Data.Text.Short qualified as ShortText
26 import Data.Time (UTCTime)
27 import Data.Tree qualified as Tree
28 import Data.Validity.Map ()
29 import Data.Validity.Set ()
30 import Data.Validity.Text ()
31 import Debug.Pretty.Simple (pTraceShow, pTraceShowId)
32 import GHC.IsList (toList)
34 import Numeric (showFFloat)
35 import Numeric.Natural (Natural)
36 import System.FilePath ((<.>))
38 import Test.Syd.Validity
39 import Text.Show (Show (..))
40 import Prelude (Double, Integral (..), Num (..), Rational, fromRational)
43 import Phylomemy.IndexationSpec (Pos (..), Rang)
47 rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos)
53 { documentPosition = Pos (2 * rangeIndex + 3 * docIndex)
54 , documentRoots = Map.fromList [(r, ()) | r <- roots]
61 [ [["a", "b", "c"], ["a", "d", "e"], ["e", "f", "g"]]
62 , [["a", "b"], ["d", "f"], ["a", "d"]]
63 , [["f"], ["d", "f"], ["f", "g", "a"]]
64 , [["b", "c", "e"], ["a", "d", "e"], ["a", "b", "c"]]
65 , [["d", "f", "g"], ["b", "f"], ["a", "c", "d"], ["a", "f"]]
66 , [["c", "d", "g"], ["b", "c", "g"], ["a", "b", "c"], ["e", "g"]]
68 | rangeIndex <- [1 ..]
72 rangeToDocs0 :: Pos :-> Seq.Seq (Document Pos)
76 [ (Pos (pos - (pos `rem` 2)), Seq.singleton doc)
77 | doc <- docs0 & toList
78 , let Pos pos = documentPosition doc
85 describe "splitMaximalSpanningTree" do
86 let mst0 :: MaximalSpanningTree Int Int =
87 Tree.Node MSTNode{mstNodeRangeCluster=(2,1), mstNodeSimilarity=proba1}
88 [ Tree.Node MSTNode{mstNodeRangeCluster=(1,1), mstNodeSimilarity=proba0} []
89 , Tree.Node MSTNode{mstNodeRangeCluster=(1,2), mstNodeSimilarity=assertProbability 0.2} []
90 , Tree.Node MSTNode{mstNodeRangeCluster=(1,3), mstNodeSimilarity=assertProbability 0.3}
91 [ Tree.Node MSTNode{mstNodeRangeCluster=(2,3), mstNodeSimilarity=assertProbability 0.3} []
92 , Tree.Node MSTNode{mstNodeRangeCluster=(2,4), mstNodeSimilarity=proba0}
93 [ Tree.Node MSTNode{mstNodeRangeCluster=(1,4), mstNodeSimilarity=assertProbability 0.2}
94 [ Tree.Node MSTNode{mstNodeRangeCluster=(2,2), mstNodeSimilarity=assertProbability 0.3} []
95 , Tree.Node MSTNode{mstNodeRangeCluster=(2,5), mstNodeSimilarity=assertProbability 0.3} []
100 goldenBuilder ("mst=0" <.> "split=0") $
101 dotMaximalSpanningTrees [mst0]
102 goldenBuilder ("mst=0" <.> "split=1") $
103 dotMaximalSpanningTrees $
104 splitMaximalSpanningTree mst0
105 goldenBuilder ("mst=0" <.> "split=2") $
106 dotMaximalSpanningTrees $
108 & splitMaximalSpanningTree
109 >>= splitMaximalSpanningTree
110 goldenBuilder ("mst=0" <.> "split=3") $
111 dotMaximalSpanningTrees $
113 & splitMaximalSpanningTree
114 >>= splitMaximalSpanningTree
115 >>= splitMaximalSpanningTree
117 letName rangeToDocs0 $ \rangeToDocs ->
118 letName ["a", "b", "c", "d", "e", "f", "g"] $ \roots ->
119 forM_ ([1 .. 1] :: [Int]) \minSupp ->
120 forM_ ([2 .. 2] :: [Int]) \minSize -> do
121 let clusters = clusterize roots (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize) rangeToDocs
122 -- let allSimils = allSimilarities similarityJaccard (clusters <&> unName)
123 let msf = maximalSpanningForest similarityJaccard (clusters <&> unName)
124 describe "dotMaximalSpanningTrees" do
125 forM_ ([0, 0.3, 1] :: [Rational]) \lambda -> do
126 letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
127 goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $
128 dotMaximalSpanningForest $
129 msfSplit predMeasure roots msf
131 -- describe "dotMaximalSpanningTrees" do
132 -- ([Map.keysSet similToMST | similToMST <- msf & Map.elems] & Set.unions & toList)
133 -- & (`foldM_` msf) \acc simil -> do
134 -- let similS = showFFloat Nothing (fromRational @Double (runProbability simil)) ""
135 -- let acc' = acc & (`Map.foldrWithKey` Map.empty) \_simil mst ->
136 -- Map.unionWith (Map.unionWith (Map.unionWith (Seq.><))) $
137 -- splitMaximalSpanningTree mst
138 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "simil=" <> similS) $
139 -- dotMaximalSpanningTrees acc
141 -- describe "splitMaximalSpanningTrees" do
142 -- forM_ ([0] :: [Rational]) \lambda -> do
143 -- letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
144 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $
145 -- dotMaximalSpanningTrees $
146 -- splitMaximalSpanningTrees predMeasure roots msf
147 -- describe "dotSimilarities" do
148 -- forM_ ([0] :: [Rational]) \lambda -> do
149 -- --letName (predictionMeasureF (assertProbability lambda)) \predMeasure -> do
150 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "lambda=" <> showFFloat (Just 2) (fromRational @Double lambda) "") $
152 -- --(splitMaximalSpanningTrees predMeasure roots msf)
155 -- describe "similarities" do
156 -- goldenShow ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize) weights
157 -- describe "phylomemyDOT" do
158 -- forM_ weights \minWeight ->
159 -- goldenBuilder ("docs=docs0" <.> "minSupp=" <> show minSupp <.> "minSize=" <> show minSize <.> "minWeight=" <> show minWeight) $
161 -- phylomemyRaise minWeight phy