]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Metrics.hs
[FEAT] Scores, main backend database functions, needs API connection (WIP)
[gargantext.git] / src / Gargantext / Database / Action / Metrics.hs
1 {-|
2 Module : Gargantext.Database.Metrics
3 Description : Get Metrics from Storage (Database like)
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Node API
11 -}
12
13 module Gargantext.Database.Action.Metrics
14 where
15
16 import Data.HashMap.Strict (HashMap)
17 import Data.Map (Map)
18 import Data.Set (Set)
19 import Data.Vector (Vector)
20 import Gargantext.API.Ngrams.Tools (filterListWithRoot, groupNodesByNgrams, Diagonal(..), getCoocByNgrams, mapTermListRoot, RootTerm, getRepo')
21 import Gargantext.API.Ngrams.Types (TabType(..), ngramsTypeFromTabType, NgramsTerm)
22 import Gargantext.Core.Mail.Types (HasMail)
23 import Gargantext.Core.NodeStory
24 import Gargantext.Core.Text.Metrics (scored, Scored(..), {-localMetrics, toScored-})
25 import Gargantext.Core.Types (ListType(..), Limit, NodeType(..), ContextId)
26 import Gargantext.Database.Action.Flow.Types (FlowCmdM)
27 import Gargantext.Database.Action.Metrics.NgramsByContext (getContextsByNgramsOnlyUser{-, getTficfWith-})
28 import Gargantext.Database.Admin.Config (userMaster)
29 import Gargantext.Database.Admin.Types.Node (ListId, CorpusId)
30 import Gargantext.Database.Query.Table.Node (defaultList)
31 import Gargantext.Database.Query.Table.Node.Select
32 import Gargantext.Prelude
33 import qualified Data.HashMap.Strict as HM
34 import qualified Data.Map as Map
35 import qualified Data.Set as Set
36 import qualified Data.List as List
37
38 getMetrics :: FlowCmdM env err m
39 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
40 -> m (HashMap NgramsTerm (ListType, Maybe NgramsTerm), Vector (Scored NgramsTerm))
41 getMetrics cId maybeListId tabType maybeLimit = do
42 (ngs, _, myCooc) <- getNgramsCooc cId maybeListId tabType maybeLimit
43 -- TODO HashMap
44 pure (ngs, scored myCooc)
45
46
47 getNgramsCooc :: (FlowCmdM env err m)
48 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
49 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
50 , HashMap NgramsTerm (Maybe RootTerm)
51 , HashMap (NgramsTerm, NgramsTerm) Int
52 )
53 getNgramsCooc cId maybeListId tabType maybeLimit = do
54 (ngs', ngs) <- getNgrams cId maybeListId tabType
55
56 lId <- defaultList cId
57 lIds <- selectNodesWithUsername NodeList userMaster
58
59 myCooc <- HM.filter (>1) <$> getCoocByNgrams (Diagonal True)
60 <$> groupNodesByNgrams ngs
61 <$> getContextsByNgramsOnlyUser cId
62 (lIds <> [lId])
63 (ngramsTypeFromTabType tabType)
64 (take' maybeLimit $ HM.keys ngs)
65 pure $ (ngs', ngs, myCooc)
66
67 -- Used for scores in Ngrams Table
68 getNgramsOccurrences :: (FlowCmdM env err m)
69 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
70 -> m (HashMap NgramsTerm Int)
71 getNgramsOccurrences c l t ml = HM.map Set.size <$> getNgramsContexts c l t ml
72
73
74
75 getNgramsContexts :: (FlowCmdM env err m)
76 => CorpusId -> Maybe ListId -> TabType -> Maybe Limit
77 -> m (HashMap NgramsTerm (Set ContextId))
78 getNgramsContexts cId maybeListId tabType maybeLimit = do
79 (_ngs', ngs) <- getNgrams cId maybeListId tabType
80
81 lId <- defaultList cId
82 lIds <- selectNodesWithUsername NodeList userMaster
83
84 -- TODO maybe add an option to group here
85 getContextsByNgramsOnlyUser cId
86 (lIds <> [lId])
87 (ngramsTypeFromTabType tabType)
88 (take' maybeLimit $ HM.keys ngs)
89
90
91
92 -- Used for scores in Doc Table
93 getContextsNgramsScore :: (FlowCmdM env err m)
94 => CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit
95 -> m (Map ContextId Int)
96 getContextsNgramsScore cId maybeListId tabType listType maybeLimit
97 = Map.map Set.size <$> getContextsNgrams cId maybeListId tabType listType maybeLimit
98
99 getContextsNgrams :: (FlowCmdM env err m)
100 => CorpusId -> Maybe ListId -> TabType -> ListType -> Maybe Limit
101 -> m (Map ContextId (Set NgramsTerm))
102 getContextsNgrams cId maybeListId tabType listType maybeLimit = do
103 (ngs', ngs) <- getNgrams cId maybeListId tabType
104 lId <- defaultList cId
105 lIds <- selectNodesWithUsername NodeList userMaster
106
107 result <- groupNodesByNgrams ngs <$> getContextsByNgramsOnlyUser
108 cId
109 (lIds <> [lId])
110 (ngramsTypeFromTabType tabType)
111 ( take' maybeLimit
112 $ HM.keys
113 $ HM.filter (\v -> fst v == listType) ngs'
114 )
115
116 pure $ Map.fromListWith (<>)
117 $ List.concat
118 $ map (\(ng, contexts) -> List.zip (Set.toList contexts) (List.cycle [Set.singleton ng]))
119 $ HM.toList result
120
121
122
123 getNgrams :: (HasMail env, HasNodeStory env err m)
124 => CorpusId -> Maybe ListId -> TabType
125 -> m ( HashMap NgramsTerm (ListType, Maybe NgramsTerm)
126 , HashMap NgramsTerm (Maybe RootTerm)
127 )
128 getNgrams cId maybeListId tabType = do
129
130 lId <- case maybeListId of
131 Nothing -> defaultList cId
132 Just lId' -> pure lId'
133
134 lists <- mapTermListRoot [lId] (ngramsTypeFromTabType tabType) <$> getRepo' [lId]
135 let maybeSyn = HM.unions $ map (\t -> filterListWithRoot t lists)
136 [MapTerm, StopTerm, CandidateTerm]
137 pure (lists, maybeSyn)
138
139 -- Some useful Tools
140 take' :: Maybe Int -> [a] -> [a]
141 take' Nothing xs = xs
142 take' (Just n) xs = take n xs
143
144
145