]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
Merge branch 'pipeline'
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Pipeline
16 where
17
18 import Data.Text.IO (readFile)
19
20 ----------------------------------------------
21 ----------------------------------------------
22 import Gargantext.Core
23 import Gargantext.Prelude
24
25 import Gargantext.Viz.Graph.Index (score)
26 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
27 import Gargantext.Text.Metrics.Occurrences
28 import Gargantext.Text.Terms
29 import Gargantext.Text.Context
30
31
32 pipeline path = do
33 -- Text <- IO Text <- FilePath
34 text <- readFile path
35 let contexts = splitBy (Sentences 3) text
36 myterms <- extractTerms Multi FR contexts
37 -- TODO filter (\t -> not . elem t stopList) myterms
38 -- TODO groupBy (Stem | GroupList)
39 let myCooc = removeApax $ cooc myterms
40 -- Cooc -> Matrix
41 pure $ score distributional myCooc
42 -- Matrix -> Clustering -> Graph -> JSON
43