]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[searx] custom flow for searx docs insert
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 -- import Data.Graph.Clustering.Louvain (hLouvain, {-iLouvainMap-})
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Text (Text)
21 -- import Debug.Trace (trace)
22 import GHC.Float (sin, cos)
23 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
24 import Gargantext.Core.Methods.Distances.Conditional (conditional)
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
27 import Gargantext.Core.Statistics
28 import Gargantext.Core.Viz.Graph
29 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Types (ClusterNode)
34 import Gargantext.Prelude
35 -- import qualified Graph.BAC.ProxemyOptim as BAC
36 import IGraph.Random -- (Gen(..))
37 import qualified Data.HashMap.Strict as HashMap
38 import qualified Data.List as List
39 import qualified Data.Map as Map
40 import qualified Data.Set as Set
41 import qualified Data.Vector.Storable as Vec
42 import qualified IGraph as Igraph
43 import qualified IGraph.Algorithms.Layout as Layout
44
45
46 -------------------------------------------------------------
47 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
48 -- defaultClustering x = pure $ BAC.defaultClustering x
49 defaultClustering x = spinglass 1 x
50
51 -------------------------------------------------------------
52 type Threshold = Double
53
54
55 cooc2graph' :: Ord t => Distance
56 -> Double
57 -> Map (t, t) Int
58 -> Map (Index, Index) Double
59 cooc2graph' distance threshold myCooc
60 = Map.filter (> threshold)
61 $ mat2map
62 $ measure distance
63 $ case distance of
64 Conditional -> map2mat Triangle 0 tiSize
65 Distributional -> map2mat Square 0 tiSize
66 $ Map.filter (> 1) myCooc'
67
68 where
69 (ti, _) = createIndices myCooc
70 tiSize = Map.size ti
71 myCooc' = toIndex ti myCooc
72
73
74 data PartitionMethod = Louvain | Spinglass -- | Bac
75
76 -- | coocurrences graph computation
77 cooc2graphWith :: PartitionMethod
78 -> Distance
79 -> Threshold
80 -> HashMap (NgramsTerm, NgramsTerm) Int
81 -> IO Graph
82 cooc2graphWith Louvain = undefined
83 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
84 -- cooc2graphWith Bac = cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
85
86
87 cooc2graphWith' :: ToComId a
88 => Partitions a
89 -> Distance
90 -> Threshold
91 -> HashMap (NgramsTerm, NgramsTerm) Int
92 -> IO Graph
93 cooc2graphWith' doPartitions distance threshold myCooc = do
94 let
95 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
96
97 nodesApprox :: Int
98 nodesApprox = n'
99 where
100 (as, bs) = List.unzip $ Map.keys distanceMap
101 n' = Set.size $ Set.fromList $ as <> bs
102
103 {- -- Debug
104 saveAsFileDebug "debug/distanceMap" distanceMap
105 printDebug "similarities" similarities
106 -}
107
108 partitions <- if (Map.size distanceMap > 0)
109 then doPartitions distanceMap
110 else panic "Text.Flow: DistanceMap is empty"
111
112 let
113 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
114
115 confluence' = confluence (Map.keys bridgeness') 3 True False
116
117 pure $ data2graph (Map.toList $ Map.mapKeys unNgramsTerm ti)
118 diag bridgeness' confluence' partitions
119
120
121 doDistanceMap :: Distance
122 -> Threshold
123 -> HashMap (NgramsTerm, NgramsTerm) Int
124 -> ( Map (Int,Int) Double
125 , Map (Index, Index) Int
126 , Map NgramsTerm Index
127 )
128 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
129 where
130 -- TODO remove below
131 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
132 $ Map.fromList
133 $ HashMap.toList myCooc
134
135 (ti, _it) = createIndices theMatrix
136 tiSize = Map.size ti
137
138 {-
139 matCooc = case distance of -- Shape of the Matrix
140 Conditional -> map2mat Triangle 0 tiSize
141 Distributional -> map2mat Square 0 tiSize
142 $ toIndex ti theMatrix
143 similarities = measure distance matCooc
144 -}
145
146 similarities = measure Distributional
147 $ map2mat Square 0 tiSize
148 $ toIndex ti theMatrix
149
150 links = round (let n :: Double = fromIntegral tiSize in n * log n)
151
152 distanceMap = Map.fromList
153 $ List.take links
154 $ List.sortOn snd
155 $ Map.toList
156 $ edgesFilter
157 $ Map.filter (> threshold)
158 $ mat2map similarities
159
160 doDistanceMap Conditional _threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
161 where
162 myCooc' = Map.fromList $ HashMap.toList myCooc
163 (ti, _it) = createIndices myCooc'
164 -- tiSize = Map.size ti
165
166 -- links = round (let n :: Double = fromIntegral tiSize in n * log n)
167
168 distanceMap = toIndex ti
169 $ Map.fromList
170 -- $ List.take links
171 -- $ List.sortOn snd
172 $ HashMap.toList
173 -- $ HashMap.filter (> threshold)
174 $ conditional myCooc
175
176
177
178 ----------------------------------------------------------
179 -- | From data to Graph
180
181 type Occurrences = Map (Int, Int) Int
182
183 data2graph :: ToComId a
184 => [(Text, Int)]
185 -> Occurrences
186 -> Map (Int, Int) Double
187 -> Map (Int, Int) Double
188 -> [a]
189 -> Graph
190 data2graph labels occurences bridge conf partitions = Graph { _graph_nodes = nodes
191 , _graph_edges = edges
192 , _graph_metadata = Nothing
193 }
194 where
195
196 community_id_by_node_id = Map.fromList
197 $ map nodeId2comId partitions
198
199 nodes = map (setCoord ForceAtlas labels bridge)
200 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
201 , node_type = Terms -- or Unknown
202 , node_id = cs (show n)
203 , node_label = l
204 , node_x_coord = 0
205 , node_y_coord = 0
206 , node_attributes =
207 Attributes { clust_default = maybe 0 identity
208 (Map.lookup n community_id_by_node_id) }
209 , node_children = [] }
210 )
211 | (l, n) <- labels
212 , Set.member n $ Set.fromList
213 $ List.concat
214 $ map (\((s,t),d) -> if d > 0 && s /=t then [s,t] else [])
215 $ Map.toList bridge
216 ]
217
218 edges = [ Edge { edge_source = cs (show s)
219 , edge_target = cs (show t)
220 , edge_weight = weight
221 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
222 -- , edge_confluence = maybe (panic "E: data2graph edges") identity $ Map.lookup (s,t) conf
223 , edge_id = cs (show i)
224 }
225 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] )
226 (Map.toList bridge)
227 , s /= t
228 , weight > 0
229 ]
230
231
232 ------------------------------------------------------------------------
233
234 data Layout = KamadaKawai | ACP | ForceAtlas
235
236
237 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
238 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
239 where
240 (x,y) = f i
241
242
243 -- | ACP
244 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
245 setCoord l labels m (n,node) = node { node_x_coord = x
246 , node_y_coord = y
247 }
248 where
249 (x,y) = getCoord l labels m n
250
251
252 getCoord :: Ord a
253 => Layout
254 -> [(a, Int)]
255 -> Map (Int, Int) Double
256 -> Int
257 -> (Double, Double)
258 getCoord KamadaKawai _ _m _n = undefined -- layout m n
259
260 getCoord ForceAtlas _ _ n = (sin d, cos d)
261 where
262 d = fromIntegral n
263
264 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
265 $ Map.lookup n
266 $ pcaReduceTo (Dimension 2)
267 $ mapArray labels m
268 where
269 to2d :: Vec.Vector Double -> (Double, Double)
270 to2d v = (x',y')
271 where
272 ds = take 2 $ Vec.toList v
273 x' = head' "to2d" ds
274 y' = last' "to2d" ds
275
276 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
277 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
278 where
279 ns = map snd items
280
281 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
282 toVec n' ns' m' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m') ns')
283 ------------------------------------------------------------------------
284
285 -- | KamadaKawai Layout
286 -- TODO TEST: check labels, nodeId and coordinates
287 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
288 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
289 where
290 coord :: (Map Int (Double,Double))
291 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
292 --p = Layout.defaultLGL
293 p = Layout.kamadaKawai
294 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
295
296 -----------------------------------------------------------------------------
297 -- MISC Tools
298 cooc2graph'' :: Ord t => Distance
299 -> Double
300 -> Map (t, t) Int
301 -> Map (Index, Index) Double
302 cooc2graph'' distance threshold myCooc = neighbourMap
303 where
304 (ti, _) = createIndices myCooc
305 myCooc' = toIndex ti myCooc
306 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
307 distanceMat = measure distance matCooc
308 neighbourMap = filterByNeighbours threshold
309 $ mat2map distanceMat
310
311 -- Quentin
312 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
313 filterByNeighbours threshold distanceMap = filteredMap
314 where
315 indexes :: [Index]
316 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
317 filteredMap :: Map (Index, Index) Double
318 filteredMap = Map.fromList
319 $ List.concat
320 $ map (\idx ->
321 let selected = List.reverse
322 $ List.sortOn snd
323 $ Map.toList
324 $ Map.filter (> 0)
325 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
326 in List.take (round threshold) selected
327 ) indexes
328
329
330
331
332