]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[FIX] reindex ngrams with all documents and with all ngrams
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Vector.Storable as Vec
44 import qualified Graph.BAC.ProxemyOptim as BAC
45 import qualified IGraph as Igraph
46 import qualified IGraph.Algorithms.Layout as Layout
47
48
49 data PartitionMethod = Spinglass | Confluence
50 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
51 instance FromJSON PartitionMethod
52 instance ToJSON PartitionMethod
53 instance ToSchema PartitionMethod
54 instance Arbitrary PartitionMethod where
55 arbitrary = elements [ minBound .. maxBound ]
56
57
58 -------------------------------------------------------------
59 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
60 -- defaultClustering x = pure $ BAC.defaultClustering x
61 defaultClustering x = spinglass 1 x
62
63 -------------------------------------------------------------
64 type Threshold = Double
65
66
67 cooc2graph' :: Ord t => Distance
68 -> Double
69 -> Map (t, t) Int
70 -> Map (Index, Index) Double
71 cooc2graph' distance threshold myCooc
72 = Map.filter (> threshold)
73 $ mat2map
74 $ measure distance
75 $ case distance of
76 Conditional -> map2mat Triangle 0 tiSize
77 Distributional -> map2mat Square 0 tiSize
78 $ Map.filter (> 1) myCooc'
79
80 where
81 (ti, _) = createIndices myCooc
82 tiSize = Map.size ti
83 myCooc' = toIndex ti myCooc
84
85
86
87 -- coocurrences graph computation
88 cooc2graphWith :: PartitionMethod
89 -> Distance
90 -> Threshold
91 -> HashMap (NgramsTerm, NgramsTerm) Int
92 -> IO Graph
93 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
94 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
95
96
97 cooc2graphWith' :: ToComId a
98 => Partitions a
99 -> Distance
100 -> Threshold
101 -> HashMap (NgramsTerm, NgramsTerm) Int
102 -> IO Graph
103 cooc2graphWith' doPartitions distance threshold myCooc = do
104 let
105 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
106
107 --{- -- Debug
108 saveAsFileDebug "/tmp/distanceMap" distanceMap
109 saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
110 -- printDebug "similarities" similarities
111 --}
112
113 partitions <- if (Map.size distanceMap > 0)
114 then doPartitions distanceMap
115 else panic "Text.Flow: DistanceMap is empty"
116
117 let
118 nodesApprox :: Int
119 nodesApprox = n'
120 where
121 (as, bs) = List.unzip $ Map.keys distanceMap
122 n' = Set.size $ Set.fromList $ as <> bs
123 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
124 confluence' = confluence (Map.keys bridgeness') 3 True False
125
126 pure $ data2graph ti diag bridgeness' confluence' partitions
127
128
129 doDistanceMap :: Distance
130 -> Threshold
131 -> HashMap (NgramsTerm, NgramsTerm) Int
132 -> ( Map (Int,Int) Double
133 , Map (Index, Index) Int
134 , Map NgramsTerm Index
135 )
136 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
137 where
138 -- TODO remove below
139 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
140 $ Map.fromList
141 $ HashMap.toList myCooc
142
143 (ti, _it) = createIndices theMatrix
144 tiSize = Map.size ti
145
146 {-
147 matCooc = case distance of -- Shape of the Matrix
148 Conditional -> map2mat Triangle 0 tiSize
149 Distributional -> map2mat Square 0 tiSize
150 $ toIndex ti theMatrix
151 similarities = measure distance matCooc
152 -}
153
154 similarities = measure Distributional
155 $ map2mat Square 0 tiSize
156 $ toIndex ti theMatrix
157
158 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
159
160 distanceMap = Map.fromList
161 $ List.take links
162 $ List.reverse
163 $ List.sortOn snd
164 $ Map.toList
165 $ edgesFilter
166 $ Map.filter (> threshold)
167 $ mat2map similarities
168
169 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
170 where
171 myCooc' = Map.fromList $ HashMap.toList myCooc
172 (ti, _it) = createIndices myCooc'
173 -- tiSize = Map.size ti
174
175 -- links = round (let n :: Double = fromIntegral tiSize in n * log n)
176
177 distanceMap = toIndex ti
178 $ Map.fromList
179 -- $ List.take links
180 $ List.sortOn snd
181 $ HashMap.toList
182 $ HashMap.filter (> threshold)
183 $ conditional myCooc
184
185 ----------------------------------------------------------
186 -- | From data to Graph
187
188 type Occurrences = Int
189
190 data2graph :: ToComId a
191 => Map NgramsTerm Int
192 -> Map (Int, Int) Occurrences
193 -> Map (Int, Int) Double
194 -> Map (Int, Int) Double
195 -> [a]
196 -> Graph
197 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
198 , _graph_edges = edges
199 , _graph_metadata = Nothing
200 }
201 where
202
203 nodes = map (setCoord ForceAtlas labels bridge)
204 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
205 , node_type = Terms -- or Unknown
206 , node_id = cs (show n)
207 , node_label = unNgramsTerm l
208 , node_x_coord = 0
209 , node_y_coord = 0
210 , node_attributes = Attributes { clust_default = fromMaybe 0
211 (Map.lookup n community_id_by_node_id)
212 }
213 , node_children = []
214 }
215 )
216 | (l, n) <- labels
217 , Set.member n nodesWithScores
218 ]
219
220 edges = [ Edge { edge_source = cs (show s)
221 , edge_target = cs (show t)
222 , edge_weight = weight
223 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
224 , edge_id = cs (show i)
225 }
226 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
227 , s /= t
228 , weight > 0
229 ]
230
231 community_id_by_node_id = Map.fromList
232 $ map nodeId2comId partitions
233
234 labels = Map.toList labels'
235
236 nodesWithScores = Set.fromList
237 $ List.concat
238 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
239 $ Map.toList bridge
240
241
242 ------------------------------------------------------------------------
243
244 data Layout = KamadaKawai | ACP | ForceAtlas
245
246
247 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
248 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
249 where
250 (x,y) = f i
251
252
253 -- | ACP
254 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
255 setCoord l labels m (n,node) = node { node_x_coord = x
256 , node_y_coord = y
257 }
258 where
259 (x,y) = getCoord l labels m n
260
261
262 getCoord :: Ord a
263 => Layout
264 -> [(a, Int)]
265 -> Map (Int, Int) Double
266 -> Int
267 -> (Double, Double)
268 getCoord KamadaKawai _ _m _n = undefined -- layout m n
269
270 getCoord ForceAtlas _ _ n = (sin d, cos d)
271 where
272 d = fromIntegral n
273
274 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
275 $ Map.lookup n
276 $ pcaReduceTo (Dimension 2)
277 $ mapArray labels m
278 where
279 to2d :: Vec.Vector Double -> (Double, Double)
280 to2d v = (x',y')
281 where
282 ds = take 2 $ Vec.toList v
283 x' = head' "to2d" ds
284 y' = last' "to2d" ds
285
286 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
287 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
288 where
289 ns = map snd items
290
291 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
292 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
293 ------------------------------------------------------------------------
294
295 -- | KamadaKawai Layout
296 -- TODO TEST: check labels, nodeId and coordinates
297 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
298 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
299 where
300 coord :: (Map Int (Double,Double))
301 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
302 --p = Layout.defaultLGL
303 p = Layout.kamadaKawai
304 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
305
306 -----------------------------------------------------------------------------
307 -- MISC Tools
308 cooc2graph'' :: Ord t => Distance
309 -> Double
310 -> Map (t, t) Int
311 -> Map (Index, Index) Double
312 cooc2graph'' distance threshold myCooc = neighbourMap
313 where
314 (ti, _) = createIndices myCooc
315 myCooc' = toIndex ti myCooc
316 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
317 distanceMat = measure distance matCooc
318 neighbourMap = filterByNeighbours threshold
319 $ mat2map distanceMat
320
321 -- Quentin
322 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
323 filterByNeighbours threshold distanceMap = filteredMap
324 where
325 indexes :: [Index]
326 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
327 filteredMap :: Map (Index, Index) Double
328 filteredMap = Map.fromList
329 $ List.concat
330 $ map (\idx ->
331 let selected = List.reverse
332 $ List.sortOn snd
333 $ Map.toList
334 $ Map.filter (> 0)
335 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
336 in List.take (round threshold) selected
337 ) indexes
338
339
340
341
342