]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Metrics.hs
Merge branch 'dev-phylo' of https://gitlab.iscpif.fr/gargantext/haskell-gargantext...
[gargantext.git] / src / Gargantext / Database / Metrics.hs
1 {-|
2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Node API
11 -}
12
13
14 {-# LANGUAGE OverloadedStrings #-}
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE FlexibleContexts #-}
17
18 module Gargantext.Database.Metrics
19 where
20
21 import Data.Map (Map)
22 import Data.Text (Text)
23 import Gargantext.API.Ngrams (TabType(..), ngramsTypeFromTabType)
24 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm)
25 import Gargantext.Core.Types (ListType(..), Limit)
26 import Gargantext.Database.Flow (FlowCmdM)
27 import Gargantext.Database.Metrics.NgramsByNode (getNodesByNgramsOnlyUser, getTficfWith)
28 import Gargantext.Database.Schema.Node (defaultList)
29 import Gargantext.Database.Types.Node (ListId, CorpusId, HyperdataCorpus)
30 import Gargantext.Database.Flow (getOrMkRootWithCorpus)
31 import Gargantext.Database.Config (userMaster)
32 import Gargantext.Prelude
33 import Gargantext.Text.Metrics (scored, Scored(..), localMetrics, toScored)
34 import qualified Data.Map as Map
35 import qualified Data.Vector.Storable as Vec
36
37
38 getMetrics' :: FlowCmdM env err m
39 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
40 -> m (Map Text (ListType, Maybe Text), [Scored Text])
41 getMetrics' cId maybeListId tabType maybeLimit = do
42 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
43 pure (ngs, scored myCooc)
44
45
46 getMetrics :: FlowCmdM env err m
47 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
48 -> m (Map Text (ListType, Maybe Text), [Scored Text])
49 getMetrics cId maybeListId tabType maybeLimit = do
50 (ngs, ngs', metrics) <- getLocalMetrics cId maybeListId tabType maybeLimit
51
52 (_masterUserId, _masterRootId, masterCorpusId) <- getOrMkRootWithCorpus userMaster "" (Nothing :: Maybe HyperdataCorpus)
53
54 metrics' <- getTficfWith cId masterCorpusId (ngramsTypeFromTabType tabType) ngs'
55
56 pure (ngs , toScored [metrics, Map.fromList $ map (\(a,b) -> (a, Vec.fromList [fst b])) $ Map.toList metrics'])
57
58
59 getLocalMetrics :: (FlowCmdM env err m)
60 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
61 -> m ( Map Text (ListType, Maybe Text)
62 , Map Text (Maybe RootTerm)
63 , Map Text (Vec.Vector Double)
64 )
65 getLocalMetrics cId maybeListId tabType maybeLimit = do
66 (ngs, ngs', myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
67 pure (ngs, ngs', localMetrics myCooc)
68
69
70 getNgramsCooc :: (FlowCmdM env err m)
71 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
72 -> m ( Map Text (ListType, Maybe Text)
73 , Map Text (Maybe RootTerm)
74 , Map (Text, Text) Int
75 )
76 getNgramsCooc cId maybeListId tabType maybeLimit = do
77 (ngs', ngs) <- getNgrams cId maybeListId tabType
78
79 let
80 take' Nothing xs = xs
81 take' (Just n) xs = take n xs
82
83 myCooc <- Map.filter (>1) <$> getCoocByNgrams (Diagonal True)
84 <$> groupNodesByNgrams ngs
85 <$> getNodesByNgramsOnlyUser cId (ngramsTypeFromTabType tabType)
86 (take' maybeLimit $ Map.keys ngs)
87 pure $ (ngs', ngs, myCooc)
88
89
90
91 getNgrams :: (FlowCmdM env err m)
92 => CorpusId -> Maybe ListId -> TabType
93 -> m (Map Text (ListType, Maybe Text), Map Text (Maybe RootTerm))
94 getNgrams cId maybeListId tabType = do
95 lId <- case maybeListId of
96 Nothing -> defaultList cId
97 Just lId' -> pure lId'
98
99 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType)
100 let maybeSyn = Map.unions $ map (\t -> filterListWithRoot t lists)
101 [GraphTerm, StopTerm, CandidateTerm]
102 pure (lists, maybeSyn)
103