]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics/TFICF.hs
[FEAT] Gargantext.Core.Mail
[gargantext.git] / src / Gargantext / Text / Metrics / TFICF.hs
1 {-|
2 Module : Gargantext.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
11
12 TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
13
14 -}
15
16
17 module Gargantext.Text.Metrics.TFICF ( TFICF
18 , TficfContext(..)
19 , Total(..)
20 , Count(..)
21 , tficf
22 )
23 where
24
25 import Data.Text (Text)
26 import Gargantext.Prelude
27
28 path :: Text
29 path = "Gargantext.Text.Metrics.TFICF"
30
31 type TFICF = Double
32
33 data TficfContext n m = TficfInfra n m
34 | TficfSupra n m
35 deriving (Show)
36
37 data Total = Total {unTotal :: !Double}
38 data Count = Count {unCount :: !Double}
39
40 tficf :: TficfContext Count Total
41 -> TficfContext Count Total
42 -> TFICF
43 tficf (TficfInfra (Count ic) (Total it) )
44 (TficfSupra (Count sc) (Total st) )
45 | it >= ic && st >= sc = (ic/it) / log (sc/st)
46 | otherwise = panic $ "[ERR]" <> path <>" Frequency impossible"
47 tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"
48
49