]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
0001-Basic-polymorphic-version-of-FrequentItemSet.patch by npouillard
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14
15 module Gargantext.Text.Metrics where
16
17 import Data.Text (Text, pack)
18 import qualified Data.Text as T
19 import Data.List (concat)
20
21 --import GHC.Real (Ratio)
22 --import qualified Data.Text.Metrics as DTM
23
24 import Gargantext.Prelude
25
26 import Gargantext.Text.Metrics.Count (occurrences, cooc)
27 import Gargantext.Text.Terms (TermType(Multi), terms)
28 import Gargantext.Core (Lang(EN))
29 import Gargantext.Core.Types (Terms(..))
30 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
31
32 --noApax :: Ord a => Map a Occ -> Map a Occ
33 --noApax m = M.filter (>1) m
34
35
36 metrics_text :: Text
37 metrics_text = T.intercalate " " ["A table is an object."
38 ,"A glas is an object too."
39 ,"Using a glas to dring is a function."
40 ,"Using a spoon to eat is a function."
41 ,"The spoon is an object to eat."
42 ]
43
44 metrics_sentences' :: [Text]
45 metrics_sentences' = splitBy (Sentences 0) metrics_text
46
47 -- | Sentences
48 metrics_sentences :: [Text]
49 metrics_sentences = ["A table is an object."
50 ,"A glas is an object too."
51 ,"The glas and the spoon are on the table."
52 ,"The spoon is an object to eat."
53 ,"The spoon is on the table and the plate and the glas."]
54
55
56 metrics_sentences_Test = metrics_sentences == metrics_sentences'
57
58 -- | Terms reordered to visually check occurrences
59 metrics_terms :: [[Text]]
60 metrics_terms = undefined
61
62 metrics_terms' :: IO [[Terms]]
63 metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
64
65 --metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
66
67 -- | Occurrences
68 {-
69 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
70 , (fromList ["object"],fromList [(["object"], 3 )])
71 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
72 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
73 -}
74 metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
75
76 {-
77 -- fromList [((["glas"],["object"]),6)
78 ,((["glas"],["spoon"]),4)
79 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
80
81 -}
82 metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
83
84 metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")
85
86
87
88
89
90