]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[FEAT] Cooc -> Matrix conversions tools.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Pipeline
16 where
17
18 import Data.Text (unpack)
19 import qualified Data.Text as DT
20
21 import Data.Text.IO (readFile)
22
23 ----------------------------------------------
24 ----------------------------------------------
25
26 import Gargantext.Core
27 import Gargantext.Core.Types
28 import Gargantext.Prelude
29
30 import Gargantext.Viz.Graph.Index (map', createIndexes)
31 import Gargantext.Viz.Graph.Distances.Matrice (distributional, int2double)
32 import Gargantext.Text.Metrics.Occurrences
33 import Gargantext.Text.Terms
34 import Gargantext.Text.Context
35
36 import Data.Array.Accelerate as A
37
38 pipeline pth = do
39 text <- readFile pth
40 let contexts = splitBy Sentences 4 text
41 myterms <- mapM (terms Multi FR) contexts
42 -- todo filter stop words
43 let myCooc = removeApax $ cooc myterms
44 --pure myCooc
45 -- Cooc map -> Matrix
46 --pure $ createIndexes myCooc
47 pure $ map' int2double myCooc
48 -- Matrix -> Graph
49
50
51
52
53