]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Database/Action/Metrics/TFICF.hs
Merge branch 'dev' into dev-ngrams-groups
[gargantext.git] / src / Gargantext / Database / Action / Metrics / TFICF.hs
1 {-|
2 Module : Gargantext.Database.Metrics.TFICF
3 Description : Ngrams by Node user and master
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 {-# LANGUAGE QuasiQuotes #-}
13
14 module Gargantext.Database.Action.Metrics.TFICF
15 where
16
17 -- import Debug.Trace (trace)
18 -- import Gargantext.Core (Lang(..))
19 import Data.HashMap.Strict (HashMap)
20 import qualified Data.HashMap.Strict as HM
21 import Data.Maybe (fromMaybe)
22 import Gargantext.Core
23 import Gargantext.Core.Text.Metrics.TFICF
24 import Gargantext.Database.Action.Metrics.NgramsByNode (getNodesByNgramsUser, getOccByNgramsOnlyFast)
25 import Gargantext.Database.Admin.Types.Node -- (ListId, CorpusId, NodeId)
26 import Gargantext.Database.Prelude (Cmd)
27 import Gargantext.Database.Query.Table.NodeNode (selectCountDocs)
28 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
29 import Gargantext.API.Ngrams.Types
30 import Gargantext.Prelude
31 import qualified Data.Set as Set
32
33 getTficf :: HasDBid NodeType
34 => UserCorpusId
35 -> MasterCorpusId
36 -> NgramsType
37 -> Cmd err (HashMap NgramsTerm Double)
38 getTficf cId mId nt = do
39 mapTextDoubleLocal <- HM.filter (> 1)
40 <$> HM.map (fromIntegral . Set.size)
41 <$> getNodesByNgramsUser cId nt
42
43 mapTextDoubleGlobal <- HM.map fromIntegral
44 <$> getOccByNgramsOnlyFast mId nt (HM.keys mapTextDoubleLocal)
45
46 countLocal <- selectCountDocs cId
47 countGlobal <- selectCountDocs mId
48
49 pure $ HM.mapWithKey (\t n ->
50 tficf (TficfInfra (Count n )
51 (Total $ fromIntegral countLocal))
52 (TficfSupra (Count $ fromMaybe 0 $ HM.lookup t mapTextDoubleGlobal)
53 (Total $ fromIntegral countGlobal))
54 ) mapTextDoubleLocal