]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[WORKFLOW] cleaned, adding data2graph.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13
14 module Gargantext.Pipeline
15 where
16
17 import Data.Text.IO (readFile)
18
19 import Control.Arrow ((***))
20 import Data.Map.Strict (Map)
21 import qualified Data.Map.Strict as M
22 import qualified Data.List as L
23 import Data.Tuple.Extra (both)
24 ----------------------------------------------
25 import Gargantext.Core (Lang(FR))
26 import Gargantext.Prelude
27
28 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
29 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
30 import Gargantext.Viz.Graph.Index (Index)
31 import Gargantext.Viz.Graph (Graph)
32 import Gargantext.Text.Metrics.Count (cooc, removeApax)
33 import Gargantext.Text.Metrics
34 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
35 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
36
37 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain, LouvainNode)
38
39
40 {-
41 ____ _ _
42 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
43 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
44 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
45 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
46 |___/
47
48 -}
49
50 -----------------------------------------------------------
51 data2graph :: Map (Int, Int) Int -> Map (Int, Int) Double -> [LouvainNode] -> Graph
52 data2graph = undefined
53 -----------------------------------------------------------
54
55
56 workflow lang path = do
57 -- Text <- IO Text <- FilePath
58 text <- readFile path
59 let contexts = splitBy (Sentences 5) text
60 myterms <- extractTerms Multi lang contexts
61
62 -- TODO filter (\t -> not . elem t stopList) myterms
63 -- TODO groupBy (Stem | GroupList)
64
65 -- @np FIXME optimization issue of filterCooc (too much memory consumed)
66 let myCooc = filterCooc $ removeApax $ cooc myterms
67 --pure myCooc
68 -- Cooc -> Matrix
69 let (ti, _) = createIndices myCooc
70 -- Matrix -> Clustering
71 let distance = score conditional $ toIndex ti myCooc
72 partitions <- cLouvain distance
73 --pure partitions
74 ---- | Building : -> Graph -> JSON
75 pure partitions
76 --pure $ data2graph myCooc distance partitions
77