]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[PIPELINE] adding clustering louvain.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Pipeline
16 where
17
18 import Data.Text.IO (readFile)
19 import qualified Data.Map.Strict as M
20 ----------------------------------------------
21 import Gargantext.Core (Lang(FR))
22 import Gargantext.Prelude
23
24 import Gargantext.Viz.Graph.Index (score, createIndexes, toIndex)
25 import Gargantext.Viz.Graph.Distances.Matrice (distributional)
26 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
27 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
28 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
29
30 import Data.Graph.Clustering.Louvain (bestpartition)
31 import Data.Graph.Clustering.Louvain.Utils (map2graph)
32
33 pipeline path = do
34 -- Text <- IO Text <- FilePath
35 text <- readFile path
36 let contexts = splitBy (Sentences 3) text
37 myterms <- extractTerms Multi FR contexts
38
39 -- TODO filter (\t -> not . elem t stopList) myterms
40 -- TODO groupBy (Stem | GroupList)
41
42 let myCooc = removeApax $ cooc myterms
43
44 -- Cooc -> Matrix
45 let theScores = M.filter (/=0) $ score distributional myCooc
46 let (ti, _) = createIndexes theScores
47
48 -- Matrix -> Clustering -> Graph -> JSON
49 pure $ bestpartition False $ map2graph $ toIndex ti theScores
50