]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
[API][FLOW] Upload file for CsvHalFormat (WIP)
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11
12 -}
13
14 {-# LANGUAGE BangPatterns #-}
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE OverloadedStrings #-}
17
18 module Gargantext.Text.Metrics
19 where
20
21 --import Data.Array.Accelerate ((:.)(..), Z(..))
22 --import Math.KMeans (kmeans, euclidSq, elements)
23
24 --import GHC.Float (exp)
25 import Data.Tuple.Extra (both)
26 import Data.Map (Map)
27 import Data.List.Extra (sortOn)
28 import GHC.Real (round)
29 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Distances.Matrice
31 import Gargantext.Viz.Graph.Index
32 import Gargantext.Core.Statistics (pcaReduceTo, Dimension(..))
33 import qualified Data.Array.Accelerate as DAA
34 import qualified Data.Array.Accelerate.Interpreter as DAA
35 import qualified Data.List as List
36 import qualified Data.Map as Map
37
38 import qualified Data.Vector.Storable as Vec
39
40 type GraphListSize = Int
41 type InclusionSize = Int
42
43 {-
44 toScored' :: Ord t => [Map t (Vec.Vector Double)] -> [Scored t]
45 toScored' = map2scored
46 . (pcaReduceTo (Dimension 2))
47 . (Map.filter (\v -> Vec.length v > 1))
48 . (Map.unionsWith (<>))
49 -}
50
51 scored :: Ord t => Map (t,t) Int -> [Scored t]
52 scored = map2scored . (pcaReduceTo (Dimension 2)) . scored2map
53 where
54 scored2map :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
55 scored2map m = Map.fromList $ map (\(Scored t i s) -> (t, Vec.fromList [i,s])) $ scored' m
56
57 map2scored :: Ord t => Map t (Vec.Vector Double) -> [Scored t]
58 map2scored = map (\(t, ds) -> Scored t (Vec.head ds) (Vec.last ds)) . Map.toList
59
60 -- TODO change type with (x,y)
61 data Scored ts = Scored
62 { _scored_terms :: !ts
63 , _scored_incExc :: !InclusionExclusion
64 , _scored_speGen :: !SpecificityGenericity
65 } deriving (Show)
66
67 localMetrics' :: Ord t => Map (t,t) Int -> Map t (Vec.Vector Double)
68 localMetrics' m = Map.fromList $ zipWith (\(_,t) (inc,spe) -> (t, Vec.fromList [inc,spe]))
69 (Map.toList fi)
70 scores
71 where
72 (ti, fi) = createIndices m
73 (is, ss) = incExcSpeGen $ cooc2mat ti m
74 scores = DAA.toList
75 $ DAA.run
76 $ DAA.zip (DAA.use is) (DAA.use ss)
77
78
79 -- TODO Code to be remove below
80 -- TODO in the textflow we end up needing these indices , it might be
81 -- better to compute them earlier and pass them around.
82 scored' :: Ord t => Map (t,t) Int -> [Scored t]
83 scored' m = zipWith (\(_,t) (inc,spe) -> Scored t (inc) (spe)) (Map.toList fi) scores
84 where
85 (ti, fi) = createIndices m
86 (is, ss) = incExcSpeGen $ cooc2mat ti m
87 scores = DAA.toList
88 $ DAA.run
89 $ DAA.zip (DAA.use is) (DAA.use ss)
90
91
92 takeScored :: Ord t => GraphListSize -> InclusionSize -> Map (t,t) Int -> ([t],[t])
93 takeScored listSize incSize = both (map _scored_terms)
94 . linearTakes listSize incSize _scored_speGen
95 _scored_incExc
96 . scored
97
98
99 -- | Filter Scored data
100 -- >>> linearTakes 2 3 fst snd $ Prelude.zip ([1..10] :: [Int]) (reverse $ [1..10] :: [Int])
101 -- [(3,8),(6,5)]
102 linearTakes :: (Ord b1, Ord b2)
103 => GraphListSize -> InclusionSize
104 -> (a -> b2) -> (a -> b1) -> [a] -> ([a],[a])
105 linearTakes mls incSize speGen incExc = (List.splitAt mls)
106 . List.concat
107 . map (take $ round
108 $ (fromIntegral mls :: Double)
109 / (fromIntegral incSize :: Double)
110 )
111 . map (sortOn speGen)
112 . splitEvery incSize
113 . take 5000
114 . takePercent (0.70)
115 . sortOn incExc
116
117 takePercent :: Double -> [a] -> [a]
118 takePercent l xs = List.take l' xs
119 where
120 l' = round $ l * (fromIntegral $ List.length xs)
121
122 splitTake :: (Int, a -> Bool) -> (Int, a -> Bool) -> [a] -> ([a], [a])
123 splitTake (a, af) (b, bf) xs = (mpa <> mpb, ca <> cb)
124 where
125 (mpa, ca) = List.splitAt a $ List.filter af xs
126 (mpb, cb) = List.splitAt b $ List.filter bf xs
127