2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
15 module Gargantext.Pipeline
18 import Data.Text.IO (readFile)
20 import Control.Arrow ((***))
21 import Data.Map.Strict (Map)
22 import qualified Data.Map.Strict as M
23 import qualified Data.Set as S
24 import qualified Data.List as L
25 import Data.Tuple.Extra (both)
26 ----------------------------------------------
27 import Gargantext.Core (Lang(FR))
28 import Gargantext.Prelude
30 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
31 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
32 import Gargantext.Viz.Graph.Index (Index)
33 import Gargantext.Text.Metrics.Count (cooc, removeApax)
34 import Gargantext.Text.Metrics (incExcSpeGen)
35 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
36 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
38 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
40 -- ord relevance: top n plus inclus
41 -- échantillonnage de généricity
43 --filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
45 ---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
46 ----(ti, fi) = createIndices m
47 -- . fromIndex fi $ filterMat $ cooc2mat ti m
50 import Data.Array.Accelerate (Matrix)
52 filterMat :: Matrix Int -> [(Index, Index)]
53 filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
55 (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
62 -- Text <- IO Text <- FilePath
64 let contexts = splitBy (Sentences 5) text
65 myterms <- extractTerms Multi FR contexts
67 -- TODO filter (\t -> not . elem t stopList) myterms
68 -- TODO groupBy (Stem | GroupList)
70 let myCooc = removeApax $ cooc myterms
71 --let (ti, fi) = createIndices myCooc
72 pure $ incExcSpeGen myCooc
75 -- -- filter by spec/gen (dynmaic programming)
76 -- let theScores = M.filter (>0) $ score conditional myCoocFiltered
78 ------ -- Matrix -> Clustering
79 ------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
80 -- partitions <- cLouvain theScores
82 ---- | Building : -> Graph -> JSON