2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
14 module Gargantext.Pipeline
17 import Data.Text.IO (readFile)
19 import Control.Arrow ((***))
20 import Data.Map.Strict (Map)
21 import qualified Data.Map.Strict as M
22 import qualified Data.List as L
23 import Data.Tuple.Extra (both)
24 ----------------------------------------------
25 import Gargantext.Core (Lang(FR))
26 import Gargantext.Prelude
28 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
29 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
30 import Gargantext.Viz.Graph.Index (Index)
31 import Gargantext.Viz.Graph (Graph)
32 import Gargantext.Text.Metrics.Count (cooc, removeApax)
33 import Gargantext.Text.Metrics
34 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
35 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
37 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, LouvainNode)
42 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
43 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
44 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
45 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
50 -----------------------------------------------------------
51 data2graph :: Map (Int, Int) Int -> Map (Int, Int) Double -> [LouvainNode] -> Graph
52 data2graph = undefined
53 -----------------------------------------------------------
56 workflow lang path = do
57 -- Text <- IO Text <- FilePath
59 let contexts = splitBy (Sentences 5) text
60 myterms <- extractTerms Multi lang contexts
62 -- TODO filter (\t -> not . elem t stopList) myterms
63 -- TODO groupBy (Stem | GroupList)
65 -- @np FIXME optimization issue of filterCooc (too much memory consumed)
66 let myCooc = filterCooc $ removeApax $ cooc myterms
69 let (ti, _) = createIndices myCooc
70 -- Matrix -> Clustering
71 let distance = score conditional $ toIndex ti myCooc
72 partitions <- cLouvain distance
74 ---- | Building : -> Graph -> JSON
76 --pure $ data2graph myCooc distance partitions