]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[CLEAN] imports for pipeline.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Pipeline
16 where
17
18 import Data.Text.IO (readFile)
19 ----------------------------------------------
20 import Gargantext.Core (Lang(FR))
21 import Gargantext.Prelude
22
23 import Gargantext.Viz.Graph.Index (score)
24 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
25 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
26 import Gargantext.Text.Terms (TermType(Multi), extractTerms)
27 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
28
29
30 pipeline path = do
31 -- Text <- IO Text <- FilePath
32 text <- readFile path
33 let contexts = splitBy (Sentences 3) text
34 myterms <- extractTerms Multi FR contexts
35 -- TODO filter (\t -> not . elem t stopList) myterms
36 -- TODO groupBy (Stem | GroupList)
37 let myCooc = removeApax $ cooc myterms
38 -- Cooc -> Matrix
39 pure $ score distributional myCooc
40 -- Matrix -> Clustering -> Graph -> JSON
41