]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Metrics.hs
Minor build/packaging tweaks
[gargantext.git] / src / Gargantext / Text / Metrics.hs
1 {-|
2 Module : Gargantext.Text.Metrics
3 Description : All parsers of Gargantext in one file.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Mainly reexport functions in @Data.Text.Metrics@
11 -}
12
13 {-# LANGUAGE NoImplicitPrelude #-}
14 {-# LANGUAGE OverloadedStrings #-}
15
16 module Gargantext.Text.Metrics where
17
18 import Data.Text (Text, pack)
19 import qualified Data.Text as T
20 import Data.List (concat)
21
22 --import GHC.Real (Ratio)
23 --import qualified Data.Text.Metrics as DTM
24
25 import Gargantext.Prelude
26
27 import Gargantext.Text.Metrics.Count (occurrences, cooc)
28 import Gargantext.Text.Terms (TermType(Multi), terms)
29 import Gargantext.Core (Lang(EN))
30 import Gargantext.Core.Types (Terms(..))
31 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
32
33 --noApax :: Ord a => Map a Occ -> Map a Occ
34 --noApax m = M.filter (>1) m
35
36
37 metrics_text :: Text
38 metrics_text = T.concat ["A table is an object."
39 ,"A glas is an object too."
40 ,"Using a glas to dring is a function."
41 ,"Using a spoon to eat is a function."
42 ,"The spoon is an object to eat."
43 ]
44
45 metrics_sentences' :: [Text]
46 metrics_sentences' = splitBy (Sentences 0) metrics_text
47
48 -- | Sentences
49 metrics_sentences :: [Text]
50 metrics_sentences = ["A table is an object."
51 ,"A glas is an object too."
52 ,"The glas and the spoon are on the table."
53 ,"The spoon is an object to eat."
54 ,"The spoon is on the table and the plate and the glas."]
55
56
57 metrics_sentences_Test = metrics_sentences == metrics_sentences'
58
59 -- | Terms reordered to visually check occurrences
60 metrics_terms :: [[Text]]
61 metrics_terms = undefined
62
63 metrics_terms' :: IO [[Terms]]
64 metrics_terms' = mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text
65
66 --metrics_terms_Test = metrics_terms == ((map _terms_label) <$> metrics_terms')
67
68 -- | Occurrences
69 {-
70 fromList [ (fromList ["table"] ,fromList [(["table"] , 3 )])]
71 , (fromList ["object"],fromList [(["object"], 3 )])
72 , (fromList ["glas"] ,fromList [(["glas"] , 2 )])
73 , (fromList ["spoon"] ,fromList [(["spoon"] , 2 )])
74 -}
75 metrics_occ = occurrences <$> concat <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
76
77 {-
78 -- fromList [((["glas"],["object"]),6)
79 ,((["glas"],["spoon"]),4)
80 ,((["glas"],["table"]),6),((["object"],["spoon"]),6),((["object"],["table"]),9),((["spoon"],["table"]),6)]
81
82 -}
83 metrics_cooc = cooc <$> (mapM (terms Multi EN) $ splitBy (Sentences 0) metrics_text)
84
85 metrics_cooc' = (mapM (terms Multi EN) $ splitBy (Sentences 0) "The table object. The table object.")
86
87
88