]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Examples.hs
[ngrams] add score update endpoint + sorting
[gargantext.git] / src / Gargantext / Core / Text / Examples.hs
1 {-|
2 Module : Gargantext.Core.Text.Examples
3 Description : Minimal Examples to test behavior of the functions.
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 This file is intended for these purposes:
11
12 - documentation for teaching and research
13 - learn basics of Haskell which is a scientific programming language
14 - behavioral tests (that should be completed with uni-tests and scale-tests)
15
16 This document defines basic of Text definitions according to Gargantext..
17
18 - What is a term ?
19 - What is a sentence ?
20 - What is a paragraph ?
21
22 -}
23
24 {-# LANGUAGE BangPatterns #-}
25
26 module Gargantext.Core.Text.Examples
27 where
28
29 import Data.Array.Accelerate (toList, Matrix)
30 import Data.Map (Map)
31 import Data.Ord (Down(..))
32 import Data.Text (Text)
33 import Data.Tuple.Extra (both)
34 import Gargantext.Core (Lang(EN))
35 import Gargantext.Core.Methods.Distances.Accelerate.SpeGen
36 import Gargantext.Core.Text.Context (splitBy, SplitContext(Sentences))
37 import Gargantext.Core.Text.Metrics.Count (Grouped)
38 import Gargantext.Core.Text.Metrics.Count (occurrences, cooc)
39 import Gargantext.Core.Text.Terms (TermType(MonoMulti), terms)
40 import Gargantext.Core.Types (Terms(..), Label)
41 import Gargantext.Core.Viz.Graph.Index
42 import Gargantext.Prelude
43 import qualified Data.Array.Accelerate as DAA
44 import qualified Data.List as List
45 import qualified Data.Map as Map
46 import qualified Data.Text as Text
47
48 -- | Sentences
49 -- Let be a list of Texts: ['Data.Text.Text']. Each text in this example is a sentence.
50 --
51 -- >>> ex_sentences
52 -- ["There is a table with a glass of wine and a spoon.","I can see the glass on the table.","There was only a spoon on that table.","The glass just fall from the table, pouring wine everywhere.","I wish the glass did not contain wine."]
53 ex_sentences :: [Text]
54 ex_sentences = [ "There is a table with a glass of wine and a spoon."
55 , "I can see the glass on the table."
56 , "There was only a spoon on that table."
57 , "The glass just fall from the table, pouring wine everywhere."
58 , "I wish the glass did not contain wine."
59 ]
60
61
62 -- | From list to simple text as paragraph.
63 -- Let 'Data.Text.intercalate' each sentence with a space. Result is a paragraph.
64 --
65 -- >>> T.intercalate (T.pack " ") ex_sentences
66 -- "There is a table with a glass of wine and a spoon. I can see the glass on the table. There was only a spoon on that table. The glass just fall from the table, pouring wine everywhere. I wish the glass did not contain wine."
67 ex_paragraph :: Text
68 ex_paragraph = Text.intercalate " " ex_sentences
69
70 -- | Let split sentences by Contexts of text.
71 -- More about 'Gargantext.Core.Text.Context'
72 --
73 -- >>> ex_sentences == splitBy (Sentences 0) ex_paragraph
74 -- True
75
76 -- | Terms reordered to visually check occurrences
77 -- Split text by sentence and then extract ngrams.
78 --
79 -- >>> mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
80 -- [[["table"],["glass"],["wine"],["spoon"]],[["glass"],["table"]],[["spoon"],["table"]],[["glass"],["table"],["wine"]],[["glass"],["wine"]]]
81 ex_terms :: IO [[Terms]]
82 ex_terms = mapM (terms (MonoMulti EN)) $ splitBy (Sentences 0) ex_paragraph
83
84 -- | Test the Occurrences
85 --
86 -- >>> occurrences <$> List.concat <$> ex_terms
87 -- fromList [(fromList ["glass"],fromList [(["glass"],4)]),(fromList ["spoon"],fromList [(["spoon"],2)]),(fromList ["tabl"],fromList [(["table"],4)]),(fromList ["wine"],fromList [(["wine"],3)])]
88 ex_occ :: IO (Map Grouped (Map Terms Int))
89 ex_occ = occurrences <$> List.concat <$> ex_terms
90
91 -- | Test the cooccurrences
92 -- Use the 'Gargantext.Core.Text.Metrics.Count.cooc' function.
93 --
94 -- >>> cooc <$> ex_terms
95 -- fromList [((["glass"],["glass"]),4),((["spoon"],["glass"]),1),((["spoon"],["spoon"]),2),((["table"],["glass"]),3),((["table"],["spoon"]),2),((["table"],["table"]),4),((["wine"],["glass"]),3),((["wine"],["spoon"]),1),((["wine"],["table"]),2),((["wine"],["wine"]),3)]
96 ex_cooc :: IO (Map (Label, Label) Int)
97 ex_cooc = cooc <$> ex_terms
98
99 -- | Tests the specificity and genericity
100 --
101 -- >>> ex_cooc_mat
102 -- (fromList [(["glass"],0),(["spoon"],1),(["table"],2),(["wine"],3)],Matrix (Z :. 4 :. 4)
103 -- [ 4, 0, 0, 0,
104 -- 1, 2, 0, 0,
105 -- 3, 2, 4, 0,
106 -- 3, 1, 2, 3],Matrix (Z :. 4 :. 4)
107 -- [ 1.0, 0.25, 0.75, 0.75,
108 -- 0.0, 1.0, 1.0, 0.5,
109 -- 0.0, 0.0, 1.0, 0.5,
110 -- 0.0, 0.0, 0.0, 1.0],(Vector (Z :. 4) [0.5833333333333334,0.5833333333333334,0.75,0.5833333333333334],Vector (Z :. 4) [-0.5833333333333334,-0.4166666666666667,0.41666666666666674,0.5833333333333334]))
111 ex_cooc_mat :: IO (Map Label Index, Matrix Int, Matrix Double, (DAA.Vector GenericityInclusion, DAA.Vector SpecificityExclusion))
112 ex_cooc_mat = do
113 m <- ex_cooc
114 let (ti,_) = createIndices m
115 let mat_cooc = cooc2mat ti m
116 pure ( ti
117 , mat_cooc
118 , incExcSpeGen_proba mat_cooc
119 , incExcSpeGen mat_cooc
120 )
121
122 ex_incExcSpeGen :: IO ([(Label, Double)], [(Label, Double)])
123 ex_incExcSpeGen = incExcSpeGen_sorted <$> ex_cooc
124
125 incExcSpeGen_sorted :: Ord t => Map (t,t) Int -> ([(t,Double)],[(t,Double)])
126 incExcSpeGen_sorted m = both ordonne (incExcSpeGen $ cooc2mat ti m)
127 where
128 (ti,fi) = createIndices m
129 ordonne x = sortWith (Down . snd)
130 $ zip (map snd $ Map.toList fi) (toList x)
131
132