]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Metrics/TFICF.hs
[TextFlow] grouping fun
[gargantext.git] / src / Gargantext / Database / Action / Metrics / TFICF.hs
1 {-|
2 Module : Gargantext.Database.Metrics.TFICF
3 Description : Ngrams by Node user and master
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13
14 module Gargantext.Database.Action.Metrics.TFICF
15 where
16
17 -- import Debug.Trace (trace)
18 -- import Gargantext.Core (Lang(..))
19 import Data.Map.Strict (Map, toList, fromList)
20 import Data.Maybe (fromMaybe)
21 import Data.Text (Text)
22 import Gargantext.Core.Text.Metrics.TFICF
23 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getOccByNgramsOnlyFast)
24 import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
25 import Gargantext.Database.Prelude (Cmd)
26 import Gargantext.Database.Query.Table.NodeNode (selectCountDocs)
27 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
28 import Gargantext.Prelude
29 import qualified Data.Map.Strict as Map
30 import qualified Data.Set as Set
31
32 getTficf :: UserCorpusId
33 -> MasterCorpusId
34 -> NgramsType
35 -> Cmd err (Map Text Double)
36 getTficf cId mId nt = do
37 mapTextDoubleLocal <- Map.filter (> 1)
38 <$> Map.map (fromIntegral . Set.size)
39 <$> getNodesByNgramsUser cId nt
40
41 mapTextDoubleGlobal <- Map.map fromIntegral
42 <$> getOccByNgramsOnlyFast mId nt (Map.keys mapTextDoubleLocal)
43
44 countLocal <- selectCountDocs cId
45 countGlobal <- selectCountDocs mId
46
47 pure $ fromList [ ( t
48 , tficf (TficfInfra (Count n )
49 (Total $ fromIntegral countLocal ))
50 (TficfSupra (Count $ fromMaybe 0 $ Map.lookup t mapTextDoubleGlobal)
51 (Total $ fromIntegral countGlobal))
52 )
53 | (t, n) <- toList mapTextDoubleLocal
54 ]
55