]> Git — Sourcephile - literate-phylomemy-example1.git/blob - script.hs
init
[literate-phylomemy-example1.git] / script.hs
1 #!/usr/bin/env -S nix -L shell .#ghc .#ghcid .#xdot --command ghcid --test :main
2 -- Alternatively: #!/usr/bin/env -S nix -L shell .#ghc .#xdot --command runghc
3 -- SPDX-FileCopyrightText: 2024 Jane Doe <jane@example.org>
4 -- SPDX-License-Identifier: CC0-1.0
5
6 {-# LANGUAGE OverloadedLists #-}
7 {-# LANGUAGE OverloadedStrings #-}
8 {-# LANGUAGE ParallelListComp #-}
9
10 import Control.Concurrent.Async qualified as Async
11 import Data.ByteString.Builder qualified as BS
12 import Data.Either (fromRight)
13 import Data.Map.Strict qualified as Map
14 import Data.Sequence qualified as Seq
15 import Logic
16 import Logic.Theory.Arithmetic
17 import Logic.Theory.Ord
18 import Numeric.Probability
19 import Phylomemy qualified as Phylo
20 import System.IO qualified as Sys
21 import System.Process qualified as Sys
22 import Text.Pretty.Simple
23 -- import Turtle
24 -- import GHC.Conc
25
26 assertStrictlyPositive :: Ord a => Zeroable a => a -> () ::: a / () > Zero
27 assertStrictlyPositive i = unitName i Logic./ fromRight undefined (prove (unitName i Logic.Theory.Ord.> zero))
28
29 main = do
30 let minSupp = 1
31 let minSize = 2
32
33 let clusters =
34 Logic.letName rangeToDocs0 $ \rangeToDocs ->
35 Logic.letName ["a", "b", "c", "d", "e", "F", "g"] $ \roots ->
36 Phylo.clusterize roots (assertStrictlyPositive minSupp) (assertStrictlyPositive minSize) rangeToDocs
37
38 let phy = Phylo.phylomemy Phylo.jaccardSimilarity (unName <$> clusters)
39 let dot = Phylo.phylomemyDOT phy
40 let msf :: Map.Map (Pos, Phylo.Cluster) (Phylo.MST Pos)
41 = Phylo.branchesMSF phy
42
43 pPrint ("msf", Map.size msf)
44 Sys.withFile "phylomemy.dot" Sys.WriteMode (`BS.hPutBuilder` dot)
45 Sys.callProcess "xdot" ["phylomemy.dot"]
46
47 phy :: Phylo.PhylomemyDownstream Int
48 phy = [
49 ]
50
51 newtype Pos = Pos Int deriving (Eq, Ord, Show, Num)
52
53 rangeToDocs0 :: Pos Phylo.:-> Seq.Seq (Phylo.Document Pos)
54 rangeToDocs0 =
55 Map.fromList
56 [ ( Pos rangeIndex
57 , Seq.fromList
58 [ Phylo.Document
59 { Phylo.documentPosition = Pos (2 * rangeIndex + 3 * docIndex)
60 , Phylo.documentRoots = Map.fromList [(r, ()) | r <- roots]
61 }
62 | roots <- docs
63 | docIndex <- [1 ..]
64 ]
65 )
66 | docs <-
67 [ [["a", "b", "c"], ["a", "d", "e"], ["e", "f", "g"]]
68 , [["a", "b"], ["d", "f"]]
69 , [["f"], ["d", "f"], ["f", "g", "a"]]
70 , [["b", "c", "e"], ["a", "d", "e"], ["a", "b", "c"]]
71 , [["d", "f", "g"], ["b", "f"], ["a", "c", "d"], ["a", "f"]]
72 , [["c", "d", "g"], ["b", "c", "g"], ["a", "b", "c"], ["e", "g"]]
73 ]
74 | rangeIndex <- [1 ..]
75 ]