2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Pipeline
18 import Data.Text.IO (readFile)
19 import qualified Data.Map.Strict as M
20 ----------------------------------------------
21 import Gargantext.Core (Lang(FR))
22 import Gargantext.Prelude
24 import Gargantext.Viz.Graph.Index (score, createIndexes, toIndex)
25 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
26 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
27 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
28 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
30 import Data.Graph.Clustering.Louvain (bestpartition)
31 import Data.Graph.Clustering.Louvain.Utils (map2graph)
34 -- Text <- IO Text <- FilePath
36 let contexts = splitBy (Sentences 3) text
37 myterms <- extractTerms Multi FR contexts
39 -- TODO filter (\t -> not . elem t stopList) myterms
40 -- TODO groupBy (Stem | GroupList)
42 let myCooc = removeApax $ cooc myterms
45 let theScores = M.filter (/=0) $ score distributional myCooc
46 let (ti, _) = createIndexes theScores
48 -- Matrix -> Clustering -> Graph -> JSON
49 pure $ bestpartition False $ map2graph $ toIndex ti theScores