2 Module : Gargantext.Core.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
12 TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
17 module Gargantext.Core.Text.Metrics.TFICF ( TFICF
26 import Data.Map.Strict (Map, toList)
27 import Data.Text (Text)
28 import Gargantext.Core.Types (Ordering(..))
29 import Gargantext.Prelude
30 import qualified Data.List as List
31 import qualified Data.Ord as DO (Down(..))
34 path = "[G.T.Metrics.TFICF]"
38 data TficfContext n m = TficfInfra n m
42 data Total = Total {unTotal :: !Double}
43 data Count = Count {unCount :: !Double}
45 tficf :: TficfContext Count Total
46 -> TficfContext Count Total
48 tficf (TficfInfra (Count ic) (Total it) )
49 (TficfSupra (Count sc) (Total st) )
50 | it >= ic && st >= sc && it <= st = (it/ic) * log (st/sc)
51 | otherwise = panic $ "[ERR]" <> path <>" Frequency impossible"
52 tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
58 sortTficf Down = List.sortOn (DO.Down . snd) . toList
59 sortTficf Up = List.sortOn snd . toList