]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[Pipeline] clustering with C++ Louvain bindings, ok.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Pipeline
16 where
17
18 import Data.Text.IO (readFile)
19 import qualified Data.Map.Strict as M
20 ----------------------------------------------
21 import Gargantext.Core (Lang(FR))
22 import Gargantext.Prelude
23
24 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex)
25 import Gargantext.Viz.Graph.Distances.Matrice (conditional)
26 import Gargantext.Text.Metrics.Occurrences (cooc, removeApax)
27 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
28 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
29
30 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
31
32 pipeline path = do
33 -- Text <- IO Text <- FilePath
34 text <- readFile path
35 let contexts = splitBy (Sentences 3) text
36 myterms <- extractTerms Multi FR contexts
37
38 -- TODO filter (\t -> not . elem t stopList) myterms
39 -- TODO groupBy (Stem | GroupList)
40
41 let myCooc = removeApax $ cooc myterms
42 -- Cooc -> Matrix
43 let theScores = M.take 350 $ M.filter (>0) $ score conditional myCooc
44 let (ti, _) = createIndices theScores
45 --
46 ---- -- Matrix -> Clustering -> Graph -> JSON
47 ---- pure $ bestpartition False $ map2graph $ toIndex ti theScores
48 partitions <- cLouvain $ toIndex ti theScores
49 pure partitions
50