]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[SCORE] spegen quality tested, need to add test in comments.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Pipeline
16 where
17
18 import Data.Text.IO (readFile)
19
20 import Control.Arrow ((***))
21 import Data.Map.Strict (Map)
22 import qualified Data.Map.Strict as M
23 import qualified Data.Set as S
24 import qualified Data.List as L
25 import Data.Tuple.Extra (both)
26 ----------------------------------------------
27 import Gargantext.Core (Lang(FR))
28 import Gargantext.Prelude
29
30 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
31 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
32 import Gargantext.Viz.Graph.Index (Index)
33 import Gargantext.Text.Metrics.Count (cooc, removeApax)
34 import Gargantext.Text.Metrics (incExcSpeGen)
35 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
36 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
37
38 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
39
40 --filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
41 --filterCooc m =
42 ---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
43 ----(ti, fi) = createIndices m
44 -- . fromIndex fi $ filterMat $ cooc2mat ti m
45
46
47 import Data.Array.Accelerate (Matrix)
48
49 filterMat :: Matrix Int -> [(Index, Index)]
50 filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
51 where
52 (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
53 n = nIe + nSg
54 nIe = 30
55 nSg = 70
56
57
58 pipeline path = do
59 -- Text <- IO Text <- FilePath
60 text <- readFile path
61 let contexts = splitBy (Sentences 5) text
62 myterms <- extractTerms Multi FR contexts
63
64 -- TODO filter (\t -> not . elem t stopList) myterms
65 -- TODO groupBy (Stem | GroupList)
66
67 let myCooc = removeApax $ cooc myterms
68 --let (ti, fi) = createIndices myCooc
69 pure $ incExcSpeGen myCooc
70 -- Cooc -> Matrix
71
72 -- -- filter by spec/gen (dynmaic programming)
73 -- let theScores = M.filter (>0) $ score conditional myCoocFiltered
74 ----
75 ------ -- Matrix -> Clustering
76 ------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
77 -- partitions <- cLouvain theScores
78 -- pure partitions
79 ---- | Building : -> Graph -> JSON
80
81