2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Pipeline
18 import Data.Text.IO (readFile)
19 import qualified Data.Map.Strict as M
20 ----------------------------------------------
21 import Gargantext.Core (Lang(FR))
22 import Gargantext.Prelude
24 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex)
25 import Gargantext.Viz.Graph.Distances.Matrice (conditional)
26 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
27 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
28 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
30 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
33 -- Text <- IO Text <- FilePath
35 let contexts = splitBy (Sentences 3) text
36 myterms <- extractTerms Multi FR contexts
38 -- TODO filter (\t -> not . elem t stopList) myterms
39 -- TODO groupBy (Stem | GroupList)
41 let myCooc = removeApax $ cooc myterms
43 let theScores = M.take 350 $ M.filter (>0) $ score conditional myCooc
44 let (ti, _) = createIndices theScores
46 ---- -- Matrix -> Clustering -> Graph -> JSON
47 ---- pure $ bestpartition False $ map2graph $ toIndex ti theScores
48 partitions <- cLouvain $ toIndex ti theScores