2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Pipeline
18 import Data.Text.IO (readFile)
20 import Control.Arrow ((***))
21 import Data.Map.Strict (Map)
22 import qualified Data.Map.Strict as M
23 import qualified Data.Set as S
24 import qualified Data.List as L
25 import Data.Tuple.Extra (both)
26 ----------------------------------------------
27 import Gargantext.Core (Lang(FR))
28 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
31 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
32 import Gargantext.Viz.Graph.Index (Index)
33 import Gargantext.Text.Metrics.Count (cooc, removeApax)
34 import Gargantext.Text.Metrics (incExcSpeGen)
35 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
36 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
38 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
40 --filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
42 ---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
43 ----(ti, fi) = createIndices m
44 -- . fromIndex fi $ filterMat $ cooc2mat ti m
47 import Data.Array.Accelerate (Matrix)
49 filterMat :: Matrix Int -> [(Index, Index)]
50 filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
52 (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
59 -- Text <- IO Text <- FilePath
61 let contexts = splitBy (Sentences 5) text
62 myterms <- extractTerms Multi FR contexts
64 -- TODO filter (\t -> not . elem t stopList) myterms
65 -- TODO groupBy (Stem | GroupList)
67 let myCooc = removeApax $ cooc myterms
68 --let (ti, fi) = createIndices myCooc
69 pure $ incExcSpeGen myCooc
72 -- -- filter by spec/gen (dynmaic programming)
73 -- let theScores = M.filter (>0) $ score conditional myCoocFiltered
75 ------ -- Matrix -> Clustering
76 ------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
77 -- partitions <- cLouvain theScores
79 ---- | Building : -> Graph -> JSON