]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Pipeline.hs
need fix
[gargantext.git] / src / Gargantext / Pipeline.hs
1 {-|
2 Module : Gargantext.Pipeline
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# OPTIONS_GHC -fno-warn-name-shadowing #-}
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Pipeline
16 where
17
18 import Data.Text.IO (readFile)
19
20 import Control.Arrow ((***))
21 import Data.Map.Strict (Map)
22 import qualified Data.Map.Strict as M
23 import qualified Data.Set as S
24 import qualified Data.List as L
25 import Data.Tuple.Extra (both)
26 ----------------------------------------------
27 import Gargantext.Core (Lang(FR))
28 import Gargantext.Prelude
29
30 import Gargantext.Viz.Graph.Index (score, createIndices, toIndex, fromIndex, cooc2mat, mat2map)
31 import Gargantext.Viz.Graph.Distances.Matrice (conditional', conditional)
32 import Gargantext.Viz.Graph.Index (Index)
33 import Gargantext.Text.Metrics.Count (cooc, removeApax)
34 import Gargantext.Text.Metrics (incExcSpeGen)
35 import Gargantext.Text.Terms (TermType(Multi, Mono), extractTerms)
36 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
37
38 import Data.Graph.Clustering.Louvain.CplusPlus (cLouvain)
39
40 -- ord relevance: top n plus inclus
41 -- échantillonnage de généricity
42 --
43 --filterCooc :: Ord t => Map (t, t) Int -> Map (t, t) Int
44 --filterCooc m =
45 ---- filterCooc m = foldl (\k -> maybe (panic "no key") identity $ M.lookup k m) M.empty selection
46 ----(ti, fi) = createIndices m
47 -- . fromIndex fi $ filterMat $ cooc2mat ti m
48
49
50 import Data.Array.Accelerate (Matrix)
51
52 filterMat :: Matrix Int -> [(Index, Index)]
53 filterMat m = S.toList $ S.take n $ S.fromList $ (L.take nIe incExc') <> (L.take nSg speGen')
54 where
55 (incExc', speGen') = both ( map fst . L.sortOn snd . M.toList . mat2map) (conditional' m)
56 n = nIe + nSg
57 nIe = 30
58 nSg = 70
59
60
61 pipeline path = do
62 -- Text <- IO Text <- FilePath
63 text <- readFile path
64 let contexts = splitBy (Sentences 5) text
65 myterms <- extractTerms Multi FR contexts
66
67 -- TODO filter (\t -> not . elem t stopList) myterms
68 -- TODO groupBy (Stem | GroupList)
69
70 let myCooc = removeApax $ cooc myterms
71 --let (ti, fi) = createIndices myCooc
72 pure $ incExcSpeGen myCooc
73 -- Cooc -> Matrix
74
75 -- -- filter by spec/gen (dynmaic programming)
76 -- let theScores = M.filter (>0) $ score conditional myCoocFiltered
77 ----
78 ------ -- Matrix -> Clustering
79 ------ pure $ bestpartition False $ map2graph $ toIndex ti theScores
80 -- partitions <- cLouvain theScores
81 -- pure partitions
82 ---- | Building : -> Graph -> JSON
83
84