2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
14 module Gargantext.Pipeline
17 import Data.Text.IO (readFile)
19 import Control.Arrow ((***))
20 import Data.Map.Strict (Map)
21 import qualified Data.Map.Strict as M
22 import qualified Data.List as L
23 import Data.Tuple.Extra (both)
24 ----------------------------------------------
25 import Gargantext.Core (Lang(FR))
26 import Gargantext.Prelude
28 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
29 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
30 import Gargantext.Viz.Graph.Index (Index)
31 import Gargantext.Text.Metrics.Count (cooc, removeApax)
32 import Gargantext.Text.Metrics
33 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
34 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
36 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
41 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
42 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
43 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
44 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
49 workflow lang path = do
50 -- Text <- IO Text <- FilePath
52 let contexts = splitBy (Sentences 5) text
53 myterms <- extractTerms Multi lang contexts
55 -- TODO filter (\t -> not . elem t stopList) myterms
56 -- TODO groupBy (Stem | GroupList)
58 let myCooc = filterCooc $ removeApax $ cooc myterms
60 --let (ti, fi) = createIndices myCooc
61 -- @np FIXME optimization issue of filterCooc (too much memory consumed)
63 -- Matrix -> Clustering
64 -- pure $ bestpartition False $ map2graph $ toIndex ti myCooc
65 --partitions <- cLouvain $ toIndex ti $ M.map (\v -> (fromIntegral v) :: Double) myCooc
67 ---- | Building : -> Graph -> JSON