2 Module : Gargantext.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
12 TFICF is a generalization of [TFIDF](https://en.wikipedia.org/wiki/Tf%E2%80%93idf).
16 {-# LANGUAGE NoImplicitPrelude #-}
17 {-# LANGUAGE OverloadedStrings #-}
19 module Gargantext.Text.Metrics.TFICF ( TFICF
27 import Data.Text (Text)
28 import Gargantext.Prelude
31 path = "Gargantext.Text.Metrics.TFICF"
35 data TficfContext n m = TficfInfra n m
39 data Total = Total {unTotal :: !Double}
40 data Count = Count {unCount :: !Double}
42 tficf :: TficfContext Count Total
43 -> TficfContext Count Total
45 tficf (TficfInfra (Count ic) (Total it) )
46 (TficfSupra (Count sc) (Total st) )
47 | it >= ic && st >= sc = (ic/it) / log (sc/st)
48 | otherwise = panic $ "[ERR]" <> path <>" Frequency impossible"
49 tficf _ _ = panic $ "[ERR]" <> path <> "Undefined for these contexts"