]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Metrics.hs
Merge branch 'dev-doc-annotation-issue' of ssh://gitlab.iscpif.fr:20022/gargantext...
[gargantext.git] / src / Gargantext / Core / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Core.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE BangPatterns #-}
15
16 module Gargantext.Core.Text.Metrics
17 where
18
19 --import Data.Array.Accelerate ((:.)(..), Z(..))
20 --import Math.KMeans (kmeans, euclidSq, elements)
21
22 --import GHC.Float (exp)
23 import Data.Tuple.Extra (both)
24 import Data.Map (Map)
25 import Data.List.Extra (sortOn)
26 import GHC.Real (round)
27 import Gargantext.Prelude
28 import Gargantext.Core.Viz.Graph.Distances.Matrice
29 import Gargantext.Core.Viz.Graph.Index
30 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
31 import qualified Data.Array.Accelerate as DAA
32 import qualified Data.Array.Accelerate.Interpreter as DAA
33 import qualified Data.List as List
34 import qualified Data.Map as Map
35
36 import qualified Data.Vector.Storable as Vec
37
38 type GraphListSize = Int
39 type InclusionSize = Int
40
41 {-
42 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
43 toScored' = map2scored
44 . (pcaReduceTo (Dimension 2))
45 . (Map.filter (\v -> Vec.length v > 1))
46 . (Map.unionsWith (<>))
47 -}
48
49 scored :: Ord t => Map (t,t) Int -> [Scored t]
50 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
51 where
52 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
53 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
54
55 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
56 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
57
58 -- TODO change type with (x,y)
59 data Scored ts = Scored
60 { _scored_terms :: !ts
61 , _scored_incExc :: !InclusionExclusion
62 , _scored_speGen :: !SpecificityGenericity
63 } deriving (Show)
64
65 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
66 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
67 (Map.toList fi)
68 scores
69 where
70 (ti, fi) = createIndices m
71 (is, ss) = incExcSpeGen $ cooc2mat ti m
72 scores = DAA.toList
73 $ DAA.run
74 $ DAA.zip (DAA.use is) (DAA.use ss)
75
76
77 -- TODO Code to be remove below
78 -- TODO in the textflow we end up needing these indices , it might be
79 -- better to compute them earlier and pass them around.
80 scored' :: Ord t => Map (t,t) Int -> [Scored t]
81 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
82 where
83 (ti, fi) = createIndices m
84 (is, ss) = incExcSpeGen $ cooc2mat ti m
85 scores = DAA.toList
86 $ DAA.run
87 $ DAA.zip (DAA.use is) (DAA.use ss)
88
89
90 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
91 takeScored listSize incSize = both (map _scored_terms)
92 . linearTakes listSize incSize _scored_speGen
93 _scored_incExc
94 . scored
95
96
97 -- | Filter Scored data
98 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
99 -- [(3,8),(6,5)]
100 linearTakes :: (Ord b1, Ord b2)
101 => GraphListSize -> InclusionSize
102 -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
103 linearTakes mls incSize speGen incExc = (List.splitAt mls)
104 . List.concat
105 . map (take $ round
106 $ (fromIntegral mls :: Double)
107 / (fromIntegral incSize :: Double)
108 )
109 . map (sortOn speGen)
110 . splitEvery incSize
111 . take 5000
112 . takePercent (0.70)
113 . sortOn incExc
114
115 takePercent :: Double -> [a] -> [a]
116 takePercent l xs = List.take l' xs
117 where
118 l' = round $ l * (fromIntegral $ List.length xs)
119
120 splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
121 splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
122 where
123 (mpa, ca) = List.splitAt a $ List.filter af xs
124 (mpb, cb) = List.splitAt b $ List.filter bf xs
125