]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics/TFICF.hs
Merge branch 'dev-ngrams-repo' of ssh://delanoe.org/haskell-gargantext into dev-ngram...
[gargantext.git] / src / Gargantext / Text / Metrics / TFICF.hs
1 {-|
2 Module : Gargantext.Text.Metrics.TFICF
3 Description : TFICF Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Definition of TFICF : Term Frequency - Inverse of Context Frequency
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Text.Metrics.TFICF where
18
19 --import Data.Text (Text)
20 import Gargantext.Prelude
21 import Gargantext.Database.Schema.Ngrams (NgramsId, NgramsTerms)
22
23 data TficfContext n m = TficfLanguage n m | TficfCorpus n m | TficfDocument n m
24 deriving (Show)
25
26 data Tficf = Tficf { tficf_ngramsId :: NgramsId
27 , tficf_ngramsTerms :: NgramsTerms
28 , tficf_score :: Double
29 } deriving (Show)
30
31
32 type SupraContext = TficfContext
33 type InfraContext = TficfContext
34
35 -- | TFICF is a generalization of TFIDF
36 -- https://en.wikipedia.org/wiki/Tf%E2%80%93idf
37 tficf :: InfraContext Double Double -> SupraContext Double Double -> Double
38 tficf (TficfCorpus c c') (TficfLanguage l l') = tficf' c c' l l'
39 tficf (TficfDocument d d')(TficfCorpus c c') = tficf' d d' c c'
40 tficf _ _ = panic "Not in definition"
41
42 tficf' :: Double -> Double -> Double -> Double -> Double
43 tficf' c c' l l'
44 | c <= c' && l < l' = (l/l') / log (c/c')
45 | otherwise = panic "Frequency impossible"
46
47
48 tficf_example :: [(Double,Double,Double,Double)]
49 tficf_example = undefined