]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Graph/Tools.hs
[FIX] Regex error with Duckling
[gargantext.git] / src / Gargantext / Core / Viz / Graph / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Graph.Tools
3 Description : Tools to build Graph
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE ScopedTypeVariables #-}
13
14 module Gargantext.Core.Viz.Graph.Tools
15 where
16
17 import Data.Aeson
18 import Data.HashMap.Strict (HashMap)
19 import Data.Map (Map)
20 import Data.Maybe (fromMaybe)
21 import Data.Swagger hiding (items)
22 import GHC.Float (sin, cos)
23 import GHC.Generics (Generic)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.Core.Methods.Distances (Distance(..), measure)
26 import Gargantext.Core.Methods.Distances.Conditional (conditional)
27 import Gargantext.Core.Methods.Graph.BAC.Proxemy (confluence)
28 import Gargantext.Core.Statistics
29 import Gargantext.Core.Viz.Graph
30 import Gargantext.Core.Viz.Graph.Bridgeness (bridgeness, Partitions, ToComId(..))
31 import Gargantext.Core.Viz.Graph.Index (createIndices, toIndex, map2mat, mat2map, Index, MatrixShape(..))
32 import Gargantext.Core.Viz.Graph.Tools.IGraph (mkGraphUfromEdges, spinglass)
33 import Gargantext.Core.Viz.Graph.Utils (edgesFilter)
34 import Gargantext.Prelude
35 import Graph.Types (ClusterNode)
36 import IGraph.Random -- (Gen(..))
37 import Test.QuickCheck (elements)
38 import Test.QuickCheck.Arbitrary
39 import qualified Data.HashMap.Strict as HashMap
40 import qualified Data.List as List
41 import qualified Data.Map as Map
42 import qualified Data.Set as Set
43 import qualified Data.Vector.Storable as Vec
44 import qualified Graph.BAC.ProxemyOptim as BAC
45 import qualified IGraph as Igraph
46 import qualified IGraph.Algorithms.Layout as Layout
47
48
49 data PartitionMethod = Spinglass | Confluence
50 deriving (Generic, Eq, Ord, Enum, Bounded, Show)
51 instance FromJSON PartitionMethod
52 instance ToJSON PartitionMethod
53 instance ToSchema PartitionMethod
54 instance Arbitrary PartitionMethod where
55 arbitrary = elements [ minBound .. maxBound ]
56
57
58 -------------------------------------------------------------
59 defaultClustering :: Map (Int, Int) Double -> IO [ClusterNode]
60 -- defaultClustering x = pure $ BAC.defaultClustering x
61 defaultClustering x = spinglass 1 x
62
63 -------------------------------------------------------------
64 type Threshold = Double
65
66
67 cooc2graph' :: Ord t => Distance
68 -> Double
69 -> Map (t, t) Int
70 -> Map (Index, Index) Double
71 cooc2graph' distance threshold myCooc
72 = Map.filter (> threshold)
73 $ mat2map
74 $ measure distance
75 $ case distance of
76 Conditional -> map2mat Triangle 0 tiSize
77 Distributional -> map2mat Square 0 tiSize
78 $ Map.filter (> 1) myCooc'
79
80 where
81 (ti, _) = createIndices myCooc
82 tiSize = Map.size ti
83 myCooc' = toIndex ti myCooc
84
85
86
87 -- coocurrences graph computation
88 cooc2graphWith :: PartitionMethod
89 -> Distance
90 -> Threshold
91 -> HashMap (NgramsTerm, NgramsTerm) Int
92 -> IO Graph
93 cooc2graphWith Spinglass = cooc2graphWith' (spinglass 1)
94 cooc2graphWith Confluence= cooc2graphWith' (\x -> pure $ BAC.defaultClustering x)
95
96
97 cooc2graphWith' :: ToComId a
98 => Partitions a
99 -> Distance
100 -> Threshold
101 -> HashMap (NgramsTerm, NgramsTerm) Int
102 -> IO Graph
103 cooc2graphWith' doPartitions distance threshold myCooc = do
104 let
105 (distanceMap, diag, ti) = doDistanceMap distance threshold myCooc
106
107 --{- -- Debug
108 saveAsFileDebug "/tmp/distanceMap" distanceMap
109 saveAsFileDebug "/tmp/distanceMap.keys" (List.length $ Map.keys distanceMap)
110 -- printDebug "similarities" similarities
111 --}
112
113 partitions <- if (Map.size distanceMap > 0)
114 then doPartitions distanceMap
115 else panic "Text.Flow: DistanceMap is empty"
116
117 let
118 nodesApprox :: Int
119 nodesApprox = n'
120 where
121 (as, bs) = List.unzip $ Map.keys distanceMap
122 n' = Set.size $ Set.fromList $ as <> bs
123 bridgeness' = bridgeness (fromIntegral nodesApprox) partitions distanceMap
124 confluence' = confluence (Map.keys bridgeness') 3 True False
125
126 pure $ data2graph ti diag bridgeness' confluence' partitions
127
128
129 doDistanceMap :: Distance
130 -> Threshold
131 -> HashMap (NgramsTerm, NgramsTerm) Int
132 -> ( Map (Int,Int) Double
133 , Map (Index, Index) Int
134 , Map NgramsTerm Index
135 )
136 doDistanceMap Distributional threshold myCooc = (distanceMap, toIndex ti diag, ti)
137 where
138 -- TODO remove below
139 (diag, theMatrix) = Map.partitionWithKey (\(x,y) _ -> x == y)
140 $ Map.fromList
141 $ HashMap.toList myCooc
142
143 (ti, _it) = createIndices theMatrix
144 tiSize = Map.size ti
145
146 similarities = measure Distributional
147 $ map2mat Square 0 tiSize
148 $ toIndex ti theMatrix
149
150 links = round (let n :: Double = fromIntegral tiSize in n * (log n)^(2::Int))
151
152 distanceMap = Map.fromList
153 $ List.take links
154 $ List.reverse
155 $ List.sortOn snd
156 $ Map.toList
157 $ edgesFilter
158 $ Map.filter (> threshold)
159 $ mat2map similarities
160
161 doDistanceMap Conditional threshold myCooc = (distanceMap, toIndex ti myCooc', ti)
162 where
163 myCooc' = Map.fromList $ HashMap.toList myCooc
164 (ti, _it) = createIndices myCooc'
165
166 links = round (let n :: Double = fromIntegral (Map.size ti) in n * log n)
167
168 distanceMap = toIndex ti
169 $ Map.fromList
170 $ List.take links
171 $ List.sortOn snd
172 $ HashMap.toList
173 $ HashMap.filter (> threshold)
174 $ conditional myCooc
175
176 ----------------------------------------------------------
177 -- | From data to Graph
178
179 type Occurrences = Int
180
181 data2graph :: ToComId a
182 => Map NgramsTerm Int
183 -> Map (Int, Int) Occurrences
184 -> Map (Int, Int) Double
185 -> Map (Int, Int) Double
186 -> [a]
187 -> Graph
188 data2graph labels' occurences bridge conf partitions = Graph { _graph_nodes = nodes
189 , _graph_edges = edges
190 , _graph_metadata = Nothing
191 }
192 where
193
194 nodes = map (setCoord ForceAtlas labels bridge)
195 [ (n, Node { node_size = maybe 0 identity (Map.lookup (n,n) occurences)
196 , node_type = Terms -- or Unknown
197 , node_id = cs (show n)
198 , node_label = unNgramsTerm l
199 , node_x_coord = 0
200 , node_y_coord = 0
201 , node_attributes = Attributes { clust_default = fromMaybe 0
202 (Map.lookup n community_id_by_node_id)
203 }
204 , node_children = []
205 }
206 )
207 | (l, n) <- labels
208 , Set.member n nodesWithScores
209 ]
210
211 edges = [ Edge { edge_source = cs (show s)
212 , edge_target = cs (show t)
213 , edge_weight = weight
214 , edge_confluence = maybe 0 identity $ Map.lookup (s,t) conf
215 , edge_id = cs (show i)
216 }
217 | (i, ((s,t), weight)) <- zip ([0..]::[Integer] ) $ Map.toList bridge
218 , s /= t
219 , weight > 0
220 ]
221
222 community_id_by_node_id = Map.fromList
223 $ map nodeId2comId partitions
224
225 labels = Map.toList labels'
226
227 nodesWithScores = Set.fromList
228 $ List.concat
229 $ map (\((s,t),d) -> if d > 0 && s/=t then [s,t] else [])
230 $ Map.toList bridge
231
232
233 ------------------------------------------------------------------------
234
235 data Layout = KamadaKawai | ACP | ForceAtlas
236
237
238 setCoord' :: (Int -> (Double, Double)) -> (Int, Node) -> Node
239 setCoord' f (i,n) = n { node_x_coord = x, node_y_coord = y }
240 where
241 (x,y) = f i
242
243
244 -- | ACP
245 setCoord :: Ord a => Layout -> [(a, Int)] -> Map (Int, Int) Double -> (Int, Node) -> Node
246 setCoord l labels m (n,node) = node { node_x_coord = x
247 , node_y_coord = y
248 }
249 where
250 (x,y) = getCoord l labels m n
251
252
253 getCoord :: Ord a
254 => Layout
255 -> [(a, Int)]
256 -> Map (Int, Int) Double
257 -> Int
258 -> (Double, Double)
259 getCoord KamadaKawai _ _m _n = undefined -- layout m n
260
261 getCoord ForceAtlas _ _ n = (sin d, cos d)
262 where
263 d = fromIntegral n
264
265 getCoord ACP labels m n = to2d $ maybe (panic "Graph.Tools no coordinate") identity
266 $ Map.lookup n
267 $ pcaReduceTo (Dimension 2)
268 $ mapArray labels m
269 where
270 to2d :: Vec.Vector Double -> (Double, Double)
271 to2d v = (x',y')
272 where
273 ds = take 2 $ Vec.toList v
274 x' = head' "to2d" ds
275 y' = last' "to2d" ds
276
277 mapArray :: Ord a => [(a, Int)] -> Map (Int, Int) Double -> Map Int (Vec.Vector Double)
278 mapArray items m' = Map.fromList [ toVec n' ns m' | n' <- ns ]
279 where
280 ns = map snd items
281
282 toVec :: Int -> [Int] -> Map (Int,Int) Double -> (Int, Vec.Vector Double)
283 toVec n' ns' m'' = (n', Vec.fromList $ map (\n'' -> maybe 0 identity $ Map.lookup (n',n'') m'') ns')
284 ------------------------------------------------------------------------
285
286 -- | KamadaKawai Layout
287 -- TODO TEST: check labels, nodeId and coordinates
288 layout :: Map (Int, Int) Double -> Int -> Gen -> (Double, Double)
289 layout m n gen = maybe (panic "") identity $ Map.lookup n $ coord
290 where
291 coord :: (Map Int (Double,Double))
292 coord = Map.fromList $ List.zip (Igraph.nodes g) $ (Layout.layout g p gen)
293 --p = Layout.defaultLGL
294 p = Layout.kamadaKawai
295 g = mkGraphUfromEdges $ map fst $ List.filter (\e -> snd e > 0) $ Map.toList m
296
297 -----------------------------------------------------------------------------
298 -- MISC Tools
299 cooc2graph'' :: Ord t => Distance
300 -> Double
301 -> Map (t, t) Int
302 -> Map (Index, Index) Double
303 cooc2graph'' distance threshold myCooc = neighbourMap
304 where
305 (ti, _) = createIndices myCooc
306 myCooc' = toIndex ti myCooc
307 matCooc = map2mat Triangle 0 (Map.size ti) $ Map.filter (> 1) myCooc'
308 distanceMat = measure distance matCooc
309 neighbourMap = filterByNeighbours threshold
310 $ mat2map distanceMat
311
312 -- Quentin
313 filterByNeighbours :: Double -> Map (Index, Index) Double -> Map (Index, Index) Double
314 filterByNeighbours threshold distanceMap = filteredMap
315 where
316 indexes :: [Index]
317 indexes = List.nub $ List.concat $ map (\(idx,idx') -> [idx,idx'] ) $ Map.keys distanceMap
318 filteredMap :: Map (Index, Index) Double
319 filteredMap = Map.fromList
320 $ List.concat
321 $ map (\idx ->
322 let selected = List.reverse
323 $ List.sortOn snd
324 $ Map.toList
325 $ Map.filter (> 0)
326 $ Map.filterWithKey (\(from,_) _ -> idx == from) distanceMap
327 in List.take (round threshold) selected
328 ) indexes
329
330
331
332
333