]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
[BIN/PHYLO] first draft, needs phylo connection.
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE BangPatterns #-}
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE OverloadedStrings #-}
17
18 module Gargantext.Text.Metrics
19 where
20
21 --import Data.Array.Accelerate ((:.)(..), Z(..))
22 --import Math.KMeans (kmeans, euclidSq, elements)
23
24 --import GHC.Float (exp)
25
26 import Data.Map (Map)
27 import Data.List.Extra (sortOn)
28 import GHC.Real (round)
29 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Distances.Matrice
31 import Gargantext.Viz.Graph.Index
32 import qualified Data.Array.Accelerate as DAA
33 import qualified Data.Array.Accelerate.Interpreter as DAA
34 import qualified Data.List as List
35 import qualified Data.Map as Map
36
37 import Numeric.Statistics.PCA (pcaReduceN)
38 import qualified Data.Vector.Storable as Vec
39 import Data.Array.IArray (Array, listArray, elems)
40
41 type GraphListSize = Int
42 type InclusionSize = Int
43
44
45
46 toScored :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
47 toScored = map2scored
48 . (reduceTo (Dimension 2))
49 . (Map.filter (\v -> Vec.length v > 1))
50 . (Map.unionsWith (<>))
51
52
53 scored :: Ord t => Map (t,t) Int -> [Scored t]
54 scored = map2scored . (reduceTo (Dimension 2)) . scored2map
55
56 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
57 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
58
59 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
60 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
61
62 -- TODO change type with (x,y)
63 data Scored ts = Scored
64 { _scored_terms :: !ts
65 , _scored_incExc :: !InclusionExclusion
66 , _scored_speGen :: !SpecificityGenericity
67 } deriving (Show)
68
69 data Dimension = Dimension Int
70
71 reduceTo :: Ord t
72 => Dimension
73 -> Map t (Vec.Vector Double)
74 -> Map t (Vec.Vector Double)
75 reduceTo (Dimension d) ss = Map.fromList $ zip txts $ elems $ pcaReduceN ss'' d
76 where
77 ss'' :: Array Int (Vec.Vector Double)
78 ss'' = listArray (1, List.length ss') ss'
79
80 (txts,ss') = List.unzip $ Map.toList ss
81
82
83 localMetrics :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
84 localMetrics m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
85 (Map.toList fi)
86 scores
87 where
88 (ti, fi) = createIndices m
89 (is, ss) = incExcSpeGen $ cooc2mat ti m
90 scores = DAA.toList
91 $ DAA.run
92 $ DAA.zip (DAA.use is) (DAA.use ss)
93
94
95
96
97 -- TODO Code to be remove below
98 -- TODO in the textflow we end up needing these indices , it might be
99 -- better to compute them earlier and pass them around.
100 scored' :: Ord t => Map (t,t) Int -> [Scored t]
101 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
102 where
103 (ti, fi) = createIndices m
104 (is, ss) = incExcSpeGen $ cooc2mat ti m
105 scores = DAA.toList
106 $ DAA.run
107 $ DAA.zip (DAA.use is) (DAA.use ss)
108
109
110
111
112
113
114 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> [t]
115 takeScored listSize incSize = map _scored_terms
116 . linearTakes listSize incSize _scored_speGen
117 _scored_incExc
118 . scored
119
120
121 -- | Filter Scored data
122 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
123 -- [(3,8),(6,5)]
124 linearTakes :: (Ord b1, Ord b2)
125 => GraphListSize -> InclusionSize
126 -> (a -> b2) -> (a -> b1) -> [a] -> [a]
127 linearTakes gls incSize speGen incExc = take gls
128 . List.concat
129 . map (take $ round
130 $ (fromIntegral gls :: Double)
131 / (fromIntegral incSize :: Double)
132 )
133 . map (sortOn incExc)
134 . splitEvery incSize
135 . sortOn speGen
136
137