2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Pipeline
18 import Data.Text.IO (readFile)
20 import Control.Arrow ((***))
21 import Data.Map.Strict (Map)
22 import qualified Data.Map.Strict as M
23 import qualified Data.Set as S
24 import qualified Data.List as L
25 import Data.Tuple.Extra (both)
26 ----------------------------------------------
27 import Gargantext.Core (Lang(FR))
28 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
31 import Gargantext.Viz.Graph.Distances.Matrice (incExcSpeGen, conditional)
32 import Gargantext.Viz.Graph.Index (Index)
33 import Gargantext.Text.Metrics.Count (cooc, removeApax)
34 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
35 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
37 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
39 --filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
41 ---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
42 ----(ti, fi) = createIndices m
43 -- . fromIndex fi $ filterMat $ cooc2mat ti m
47 import Data.Array.Accelerate (Matrix)
48 filterMat :: Matrix Int -> [(Index, Index)]
49 filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
51 (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (incExcSpeGen m)
58 -- Text <- IO Text <- FilePath
60 let contexts = splitBy (Sentences 3) text
61 myterms <- extractTerms Multi FR contexts
63 -- TODO filter (\t -> not . elem t stopList) myterms
64 -- TODO groupBy (Stem | GroupList)
66 let myCooc = removeApax $ cooc myterms
67 let (ti, fi) = createIndices myCooc
71 -- -- filter by spec/gen (dynmaic programming)
72 -- let theScores = M.filter (>0) $ score conditional myCoocFiltered
74 ------ -- Matrix -> Clustering
75 ------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
76 -- partitions <- cLouvain theScores
78 ---- | Building : -> Graph -> JSON