]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Ngrams.hs
[TEXT-MINING] adding first functions/datatypes.
[gargantext.git] / src / Gargantext / Ngrams.hs
1 {-|
2 Module : Gargantext.Ngrams
3 Description : Ngrams tools
4 Copyright : (c) CNRS, 2017
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Ngrams exctration.
11
12 Definitions of ngrams.
13 n non negative integer
14
15 -}
16
17 module Gargantext.Ngrams ( module Gargantext.Ngrams.Letters
18 --, module Gargantext.Ngrams.Hetero
19 , module Gargantext.Ngrams.CoreNLP
20 , module Gargantext.Ngrams.Parser
21 , module Gargantext.Ngrams.Occurrences
22 , module Gargantext.Ngrams.TextMining
23 , module Gargantext.Ngrams.Metrics
24 , ngrams, occurrences
25 --, module Gargantext.Ngrams.Words
26 ) where
27
28 import Gargantext.Ngrams.Letters
29 --import Gargantext.Ngrams.Hetero
30 import Gargantext.Ngrams.CoreNLP
31 import Gargantext.Ngrams.Parser
32
33
34 import Gargantext.Ngrams.Occurrences
35 import Gargantext.Ngrams.TextMining
36 --import Gargantext.Ngrams.Words
37
38 import Gargantext.Ngrams.Metrics
39
40 -----------------------------------------------------------------
41
42 import Data.Char (Char, isAlpha, isSpace)
43 import Data.Text (Text, words, filter, toLower)
44 import Data.Map.Strict (Map, empty, insertWith)
45 import Data.Foldable (foldl')
46 import Gargantext.Prelude hiding (filter)
47
48 -- Maybe useful later:
49 --import NLP.Stemmer (stem, Stemmer(..))
50 --import Language.Aspell (check, suggest, spellChecker, spellCheckerWithOptions)
51 --import Language.Aspell.Options (ACOption(..))
52
53
54 ngrams :: Text -> [Text]
55 ngrams xs = monograms $ toLower $ filter isGram xs
56
57 monograms :: Text -> [Text]
58 monograms = words
59
60 isGram :: Char -> Bool
61 isGram '-' = True
62 isGram c = isAlpha c || isSpace c
63
64 -- | Compute the occurrences
65 occurrences :: Ord a => [a] -> Map a Int
66 occurrences xs = foldl' (\x y -> insertWith (+) y 1 x) empty xs
67
68
69
70