2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
11 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
12 {-# LANGUAGE NoImplicitPrelude #-}
14 module Gargantext.Pipeline
17 import Data.Text.IO (readFile)
19 import Control.Arrow ((***))
20 import Data.Map.Strict (Map)
21 import qualified Data.Map.Strict as M
22 import qualified Data.List as L
23 import Data.Tuple.Extra (both)
24 ----------------------------------------------
25 import Gargantext.Core (Lang(FR))
26 import Gargantext.Prelude
28 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
29 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
30 import Gargantext.Viz.Graph.Index (Index)
31 import Gargantext.Text.Metrics.Count (cooc, removeApax)
32 import Gargantext.Text.Metrics
33 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
34 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
36 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
41 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
42 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
43 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
44 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
50 -- Text <- IO Text <- FilePath
52 let contexts = splitBy (Sentences 5) text
53 myterms <- extractTerms Multi FR contexts
55 -- TODO filter (\t -> not . elem t stopList) myterms
56 -- TODO groupBy (Stem | GroupList)
58 let myCooc = removeApax $ cooc myterms
59 --let (ti, fi) = createIndices myCooc
61 --pure $ incExcSpeGen myCooc
64 -- -- filter by spec/gen (dynmaic programming)
65 -- let theScores = M.filter (>0) $ score conditional myCoocFiltered
67 ------ -- Matrix -> Clustering
68 ------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
69 -- partitions <- cLouvain theScores
71 ---- | Building : -> Graph -> JSON