2 Module : Gargantext.Core.Text.Examples
3 Description : Minimal Examples to test behavior of the functions.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 This file is intended for these purposes:
12 - documentation for teaching and research
13 - learn basics of Haskell which is a scientific programming language
14 - behavioral tests (that should be completed with uni-tests and scale-tests)
16 This document defines basic of Text definitions according to Gargantext..
19 - What is a sentence ?
20 - What is a paragraph ?
24 {-# LANGUAGE BangPatterns #-}
26 module Core.Text.Examples
30 import Data.Array.Accelerate (toList, Matrix)
31 import Data.Map.Strict (Map)
32 import Data.Ord (Down(..))
33 import Data.Text (Text)
34 import Data.Tuple.Extra (both)
35 import Gargantext.Core (Lang(EN))
36 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
37 import Gargantext.Core.Text.Context (splitBy, SplitContext(Sentences))
38 import Gargantext.Core.Text.Metrics.Count (Grouped)
39 import Gargantext.Core.Text.Metrics.Count (occurrences, cooc)
40 import Gargantext.Core.Text.Terms (TermType(MonoMulti), terms)
41 import Gargantext.Core.Types (Terms(..), Label)
42 import Gargantext.Core.Viz.Graph.Index
43 import Gargantext.Prelude
44 import qualified Data.Array.Accelerate as DAA
45 import qualified Data.List as List
46 import qualified Data.Map.Strict as Map
47 import qualified Data.Text as Text
50 -- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
53 -- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
54 ex_sentences :: [Text]
55 ex_sentences = [ "There is a table with a glass of wine and a spoon."
56 , "I can see the glass on the table."
57 , "There was only a spoon on that table."
58 , "The glass just fall from the table, pouring wine everywhere."
59 , "I wish the glass did not contain wine."
63 -- | From list to simple text as paragraph.
64 -- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
66 -- >>> T.intercalate (T.pack " ") ex_sentences
67 -- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
69 ex_paragraph = Text.intercalate " " ex_sentences
71 -- | Let split sentences by Contexts of text.
72 -- More about 'Gargantext.Core.Text.Context'
74 -- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
77 -- | Terms reordered to visually check occurrences
78 -- Split text by sentence and then extract ngrams.
80 -- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
81 -- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
82 ex_terms :: IO [[Terms]]
83 ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
85 -- | Test the Occurrences
87 -- >>> occurrences <$> List.concat <$> ex_terms
88 -- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
89 ex_occ :: IO (Map Grouped (Map Terms Int))
90 ex_occ = occurrences <$> List.concat <$> ex_terms
92 -- | Test the cooccurrences
93 -- Use the 'Gargantext.Core.Text.Metrics.Count.cooc' function.
95 -- >>> cooc <$> ex_terms
96 -- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
97 ex_cooc :: IO (Map (Label, Label) Int)
98 ex_cooc = cooc <$> ex_terms
100 -- | Tests the specificity and genericity
103 -- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
107 -- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
108 -- [ 1.0, 0.25, 0.75, 0.75,
109 -- 0.0, 1.0, 1.0, 0.5,
110 -- 0.0, 0.0, 1.0, 0.5,
111 -- 0.0, 0.0, 0.0, 1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
112 ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector GenericityInclusion, DAA.Vector SpecificityExclusion))
115 let (ti,_) = createIndices m
116 let mat_cooc = cooc2mat Triangle ti m
119 , incExcSpeGen_proba mat_cooc
120 , incExcSpeGen mat_cooc
123 ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
124 ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
126 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
127 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat Triangle ti m)
129 (ti,fi) = createIndices m
130 ordonne x = sortWith (Down . snd)
131 $ zip (map snd $ Map.toList fi) (toList x)