]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
[Scored filtering] adding doc.
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9 -}
10
11 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
13
14 module Gargantext.Pipeline
15 where
16
17 import Data.Text.IO (readFile)
18
19 import Control.Arrow ((***))
20 import Data.Map.Strict (Map)
21 import qualified Data.Map.Strict as M
22 import qualified Data.List as L
23 import Data.Tuple.Extra (both)
24 ----------------------------------------------
25 import Gargantext.Core (Lang(FR))
26 import Gargantext.Prelude
27
28 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
29 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
30 import Gargantext.Viz.Graph.Index (Index)
31 import Gargantext.Text.Metrics.Count (cooc, removeApax)
32 import Gargantext.Text.Metrics
33 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
34 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
35
36 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
37
38
39 {-
40 ____ _ _
41 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
42 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
43 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
44 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
45 |___/
46
47 -}
48
49 pipeline path = do
50 -- Text <- IO Text <- FilePath
51 text <- readFile path
52 let contexts = splitBy (Sentences 5) text
53 myterms <- extractTerms Multi FR contexts
54
55 -- TODO filter (\t -> not . elem t stopList) myterms
56 -- TODO groupBy (Stem | GroupList)
57
58 let myCooc = removeApax $ cooc myterms
59 --let (ti, fi) = createIndices myCooc
60 pure True
61 --pure $ incExcSpeGen myCooc
62 -- Cooc -> Matrix
63
64 -- -- filter by spec/gen (dynmaic programming)
65 -- let theScores = M.filter (>0) $ score conditional myCoocFiltered
66 ----
67 ------ -- Matrix -> Clustering
68 ------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
69 -- partitions <- cLouvain theScores
70 -- pure partitions
71 ---- | Building : -> Graph -> JSON
72