2 Module : Gargantext.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
17 module Gargantext.Text.Metrics.TFICF where
19 --import Data.Text (Text)
20 import Gargantext.Prelude
21 import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms)
23 data TficfContext n m = TficfLanguage n m | TficfCorpus n m | TficfDocument n m
26 data Tficf = Tficf { tficf_ngramsId :: NgramsId
27 , tficf_ngramsTerms :: NgramsTerms
28 , tficf_score :: Double
32 type SupraContext = TficfContext
33 type InfraContext = TficfContext
35 -- | TFICF is a generalization of TFIDF
36 -- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
37 tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
38 tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
39 tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
40 tficf _ _ = panic "Not in definition"
42 tficf' :: Double -> Double -> Double -> Double -> Double
44 | c <= c' && l < l' = (l/l') / log (c/c')
45 | otherwise = panic "Frequency impossible"
48 tficf_example :: [(Double,Double,Double,Double)]
49 tficf_example = undefined