]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[TextFlow] ngrams size (WIP)
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Viz.Graph.Tools
14 where
15
16 import Debug.Trace (trace)
17 import Data.Graph.Clustering.Louvain.Utils (LouvainNode(..))
18 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
19 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
20 import Data.Map (Map)
21 import qualified Data.Set as Set
22 import Data.Text (Text)
23 import Gargantext.Prelude
24 import Gargantext.Core.Statistics
25 import Gargantext.Core.Viz.Graph
26 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness)
27 import Gargantext.Core.Viz.Graph.Distances (Distance(..), measure)
28 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index)
29 import Gargantext.Core.Viz.Graph.IGraph (mkGraphUfromEdges)
30 import Gargantext.Core.Viz.Graph.Proxemy (confluence)
31 import GHC.Float (sin, cos)
32 import qualified IGraph as Igraph
33 import qualified IGraph.Algorithms.Layout as Layout
34 import qualified Data.Vector.Storable as Vec
35 import qualified Data.Map as Map
36 import qualified Data.List as List
37
38 type Threshold = Double
39
40
41 cooc2graph' :: Ord t => Distance
42 -> Double
43 -> Map (t, t) Int
44 -> Map (Index, Index) Double
45 cooc2graph' distance threshold myCooc = distanceMap
46 where
47 (ti, _) = createIndices myCooc
48 myCooc' = toIndex ti myCooc
49 matCooc = map2mat 0 (Map.size ti) $ Map.filter (> 1) myCooc'
50 distanceMat = measure distance matCooc
51 distanceMap = Map.filter (> threshold) $ mat2map distanceMat
52
53
54 cooc2graph :: Distance
55 -> Threshold
56 -> (Map (Text, Text) Int)
57 -> IO Graph
58 cooc2graph distance threshold myCooc = do
59 printDebug "cooc2graph" distance
60 let
61 (ti, _) = createIndices myCooc
62 myCooc' = toIndex ti myCooc
63 matCooc = map2mat 0 (Map.size ti)
64 $ Map.filterWithKey (\(a,b) _ -> a /= b)
65 $ Map.filter (> 1) myCooc'
66 distanceMat = measure distance matCooc
67 distanceMap = Map.filter (> threshold) $ mat2map distanceMat
68
69 nodesApprox :: Int
70 nodesApprox = n'
71 where
72 (as, bs) = List.unzip $ Map.keys distanceMap
73 n' = Set.size $ Set.fromList $ as <> bs
74 ClustersParams rivers level = clustersParams nodesApprox
75
76
77 partitions <- if (Map.size distanceMap > 0)
78 -- then iLouvainMap 100 10 distanceMap
79 -- then hLouvain distanceMap
80 then cLouvain level distanceMap
81 else panic "Text.Flow: DistanceMap is empty"
82
83 let
84 -- bridgeness' = distanceMap
85 bridgeness' = trace ("Rivers: " <> show rivers)
86 $ bridgeness rivers partitions distanceMap
87 confluence' = confluence (Map.keys bridgeness') 3 True False
88
89 pure $ data2graph (Map.toList ti) myCooc' bridgeness' confluence' partitions
90
91
92
93 data ClustersParams = ClustersParams { bridgness :: Double
94 , louvain :: Text
95 } deriving (Show)
96
97 clustersParams :: Int -> ClustersParams
98 clustersParams x = ClustersParams (fromIntegral x) "0.00000001" -- y
99 {- where
100 y | x < 100 = "0.000001"
101 | x < 350 = "0.000001"
102 | x < 500 = "0.000001"
103 | x < 1000 = "0.000001"
104 | otherwise = "1"
105 -}
106
107 ----------------------------------------------------------
108 -- | From data to Graph
109 data2graph :: [(Text, Int)]
110 -> Map (Int, Int) Int
111 -> Map (Int, Int) Double
112 -> Map (Int, Int) Double
113 -> [LouvainNode]
114 -> Graph
115 data2graph labels coocs bridge conf partitions = Graph nodes edges Nothing
116 where
117
118 community_id_by_node_id = Map.fromList [ (n, c) | LouvainNode n c <- partitions ]
119
120 nodes = map (setCoord ForceAtlas labels bridge)
121 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) coocs)
122 , node_type = Terms -- or Unknown
123 , node_id = cs (show n)
124 , node_label = l
125 , node_x_coord = 0
126 , node_y_coord = 0
127 , node_attributes =
128 Attributes { clust_default = maybe 0 identity
129 (Map.lookup n community_id_by_node_id) } }
130 )
131 | (l, n) <- labels
132 , Set.member n $ Set.fromList
133 $ List.concat
134 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
135 $ Map.toList bridge
136 ]
137
138 edges = [ Edge { edge_source = cs (show s)
139 , edge_target = cs (show t)
140 , edge_weight = d
141 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
142 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
143 , edge_id = cs (show i) }
144 | (i, ((s,t), d)) <- zip ([0..]::[Integer]) (Map.toList bridge), s /= t, d > 0
145 ]
146
147
148 ------------------------------------------------------------------------
149
150 data Layout = KamadaKawai | ACP | ForceAtlas
151
152
153 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
154 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
155 where
156 (x,y) = f i
157
158
159 -- | ACP
160 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
161 setCoord l labels m (n,node) = node { node_x_coord = x
162 , node_y_coord = y
163 }
164 where
165 (x,y) = getCoord l labels m n
166
167
168 getCoord :: Ord a
169 => Layout
170 -> [(a, Int)]
171 -> Map (Int, Int) Double
172 -> Int
173 -> (Double, Double)
174 getCoord KamadaKawai _ _m _n = undefined -- layout m n
175
176 getCoord ForceAtlas _ _ n = (sin d, cos d)
177 where
178 d = fromIntegral n
179
180 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
181 $ Map.lookup n
182 $ pcaReduceTo (Dimension 2)
183 $ mapArray labels m
184 where
185 to2d :: Vec.Vector Double -> (Double, Double)
186 to2d v = (x',y')
187 where
188 ds = take 2 $ Vec.toList v
189 x' = head' "to2d" ds
190 y' = last' "to2d" ds
191
192 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
193 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
194 where
195 ns = map snd items
196
197 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
198 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
199 ------------------------------------------------------------------------
200
201 -- | KamadaKawai Layout
202 -- TODO TEST: check labels, nodeId and coordinates
203 layout :: Map (Int, Int) Double -> Int -> IO (Double, Double)
204 layout m n = maybe (panic "") identity <$> Map.lookup n <$> coord
205 where
206 coord :: IO (Map Int (Double,Double))
207 coord = Map.fromList <$> List.zip (Igraph.nodes g) <$> (Layout.getLayout g p)
208 --p = Layout.defaultLGL
209 p = Layout.defaultKamadaKawai
210 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
211