]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[COOC FILTERING] workflow done but optimization issue.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13
14 module Gargantext.Pipeline
15 where
16
17 import Data.Text.IO (readFile)
18
19 import Control.Arrow ((***))
20 import Data.Map.Strict (Map)
21 import qualified Data.Map.Strict as M
22 import qualified Data.List as L
23 import Data.Tuple.Extra (both)
24 ----------------------------------------------
25 import Gargantext.Core (Lang(FR))
26 import Gargantext.Prelude
27
28 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
29 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
30 import Gargantext.Viz.Graph.Index (Index)
31 import Gargantext.Text.Metrics.Count (cooc, removeApax)
32 import Gargantext.Text.Metrics
33 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
34 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
35
36 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
37
38
39 {-
40 ____ _ _
41 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
42 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
43 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
44 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
45 |___/
46
47 -}
48
49 workflow lang path = do
50 -- Text <- IO Text <- FilePath
51 text <- readFile path
52 let contexts = splitBy (Sentences 5) text
53 myterms <- extractTerms Multi lang contexts
54
55 -- TODO filter (\t -> not . elem t stopList) myterms
56 -- TODO groupBy (Stem | GroupList)
57
58 let myCooc = filterCooc $ removeApax $ cooc myterms
59 -- Cooc -> Matrix
60 --let (ti, fi) = createIndices myCooc
61 -- @np FIXME optimization issue of filterCooc (too much memory consumed)
62 pure myCooc
63 -- Matrix -> Clustering
64 -- pure $ bestpartition False $ map2graph $ toIndex ti myCooc
65 --partitions <- cLouvain $ toIndex ti $ M.map (\v -> (fromIntegral v) :: Double) myCooc
66 --pure partitions
67 ---- | Building : -> Graph -> JSON
68