]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Terms.hs
[RENAME/ORG] ngrams -> terms.
[gargantext.git] / src / Gargantext / Text / Terms.hs
1 {-|
2 Module : Gargantext.Text.Ngrams
3 Description : Ngrams definition and tools
4 Copyright : (c) CNRS, 2017 - present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 An @n-gram@ is a contiguous sequence of n items from a given sample of
11 text. In Gargantext application the items are words, n is a non negative
12 integer.
13
14 Using Latin numerical prefixes, an n-gram of size 1 is referred to as a
15 "unigram"; size 2 is a "bigram" (or, less commonly, a "digram"); size
16 3 is a "trigram". English cardinal numbers are sometimes used, e.g.,
17 "four-gram", "five-gram", and so on.
18
19 Source: https://en.wikipedia.org/wiki/Ngrams
20
21 TODO
22 -- Prelude.concat <$> Prelude.map (filter (\n -> _my_token_pos n == Just NP)) <$> extractNgrams Gargantext.Core.EN testText_en
23
24 group Ngrams -> Tree
25 compute occ by node of Tree
26 group occs according groups
27
28 compute cooccurrences
29 compute graph
30
31 -}
32
33 {-# LANGUAGE NoImplicitPrelude #-}
34
35 module Gargantext.Text.Terms
36 where
37
38 import Gargantext.Core.Types
39
40 ------------------------------------------------------------------------
41 tokenTag2terms :: TokenTag -> Terms
42 tokenTag2terms (TokenTag w t _ _) = Terms w t
43 ------------------------------------------------------------------------
44