2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Pipeline
18 import Data.Text.IO (readFile)
19 ----------------------------------------------
20 import Gargantext.Core (Lang(FR))
21 import Gargantext.Prelude
23 import Gargantext.Viz.Graph.Index (score)
24 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
25 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
26 import Gargantext.Text.Terms (TermType(Multi), extractTerms)
27 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
31 -- Text <- IO Text <- FilePath
33 let contexts = splitBy (Sentences 3) text
34 myterms <- extractTerms Multi FR contexts
35 -- TODO filter (\t -> not . elem t stopList) myterms
36 -- TODO groupBy (Stem | GroupList)
37 let myCooc = removeApax $ cooc myterms
39 pure $ score distributional myCooc
40 -- Matrix -> Clustering -> Graph -> JSON