]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Examples.hs
[DOC+TESTS] contexts of texts.
[gargantext.git] / src / Gargantext / Text / Examples.hs
1 {-|
2 Module : Gargantext.Text.Examples
3 Description : Minimal Examples to test behavior of the functions.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This file is intended for these purposes:
11
12 - documentation for teaching and research
13 - learn basics of Haskell which is a scientific programming language
14 - behavioral tests (that should be completed with uni-tests and scale-tests
15
16 This documents defines basic of Text definitions according to Gargantext..
17
18 - What is a term ?
19 - What is a sentence ?
20 - What is a paragraph ?
21
22
23 -}
24
25 {-# LANGUAGE BangPatterns #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE OverloadedStrings #-}
28
29 module Gargantext.Text.Examples
30 where
31
32 import Data.Ord (Down(..))
33 import qualified Data.List as L
34
35 import Data.Map (Map)
36 import qualified Data.Map as M
37
38 import Data.Text (Text)
39 import qualified Data.Text as T
40
41 import Data.Tuple.Extra (both)
42 import Data.Array.Accelerate (toList, Matrix)
43
44 import Gargantext.Prelude
45 import Gargantext.Text.Metrics.Count (occurrences, cooc)
46 import Gargantext.Text.Terms (TermType(MonoMulti), terms)
47 import Gargantext.Core (Lang(EN))
48 import Gargantext.Core.Types (Terms(..), Label)
49 import Gargantext.Text.Context (splitBy, SplitContext(Sentences))
50 import Gargantext.Text.Metrics.Count (Grouped)
51 import Gargantext.Viz.Graph.Distances.Matrice
52 import Gargantext.Viz.Graph.Index
53
54 import qualified Data.Array.Accelerate as DAA
55
56 -- | Sentences
57 -- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
58 --
59 -- >>> ex_sentences
60 -- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
61 ex_sentences :: [Text]
62 ex_sentences = [ "There is a table with a glass of wine and a spoon."
63 , "I can see the glass on the table."
64 , "There was only a spoon on that table."
65 , "The glass just fall from the table, pouring wine everywhere."
66 , "I wish the glass did not contain wine."
67 ]
68
69
70 -- | From list to simple text as paragraph.
71 -- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
72 --
73 -- >>> T.intercalate (T.pack " ") ex_sentences
74 -- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
75 ex_paragraph :: Text
76 ex_paragraph = T.intercalate " " ex_sentences
77
78 -- | Let split sentences by Contexts of text.
79 -- More about 'Gargantext.Text.Context'
80 --
81 -- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
82 -- True
83
84 -- | Terms reordered to visually check occurrences
85 -- Split text by sentence and then extract ngrams.
86 --
87 -- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
88 -- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
89 ex_terms :: IO [[Terms]]
90 ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
91
92 -- | Test the Occurrences
93 --
94 -- >>> occurrences <$> L.concat <$> ex_terms
95 -- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
96 ex_occ :: IO (Map Grouped (Map Terms Int))
97 ex_occ = occurrences <$> L.concat <$> ex_terms
98
99 -- | Test the cooccurrences
100 -- Use the 'Gargantext.Text.Metrics.Count.cooc' function.
101 --
102 -- >>> cooc <$> ex_terms
103 -- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
104 ex_cooc :: IO (Map (Label, Label) Int)
105 ex_cooc = cooc <$> ex_terms
106
107 -- | Tests
108 ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector InclusionExclusion, DAA.Vector SpecificityGenericity))
109 ex_cooc_mat = do
110 m <- ex_cooc
111 let (ti,_) = createIndices m
112 let mat_cooc = cooc2mat ti m
113 pure ( ti
114 , mat_cooc
115 , incExcSpeGen_proba mat_cooc
116 , incExcSpeGen mat_cooc
117 )
118
119 ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
120 ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
121
122 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
123 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
124 where
125 (ti,fi) = createIndices m
126 ordonne x = sortWith (Down . snd) $ zip (map snd $ M.toList fi) (toList x)
127
128