]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/List.hs
[NGRAMS] fixes
[gargantext.git] / src / Gargantext / Text / List.hs
1 {-|
2 Module : Gargantext.Text.Ngrams.Lists
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Here is a longer description of this module, containing some
11 commentary with @some markup@.
12
13 -}
14
15 {-# LANGUAGE NoImplicitPrelude #-}
16 {-# LANGUAGE OverloadedStrings #-}
17
18 module Gargantext.Text.List
19 where
20
21 import Data.Text (Text)
22 import qualified Data.Text as DT
23 import Gargantext.Prelude
24
25 -- | TODO normalize text
26
27 -- | TODO Order the seperators in probability of apparition
28 separators :: [Text]
29 separators = [" ", ",", ".", "?", "!", "\""]
30
31 isIn :: Text -> Text -> Bool
32 isIn term context = any (\x -> DT.isInfixOf x context)
33 $ map (\sep -> term <> sep) separators
34
35 ------------------------------------------------------------------------
36 --graph :: [Ngrams] -> [Ngrams]
37 --graph ngs = filter (\ng -> _ngramsListName ng == Just Graph) ngs
38 --
39 --candidates :: [Ngrams] -> [Ngrams]
40 --candidates ngs = filter (\ng -> _ngramsListName ng == Just Candidate) ngs
41 --
42 --stop :: [Ngrams] -> [Ngrams]
43 --stop ngs = filter (\ng -> _ngramsListName ng == Just Stop) ngs
44 ------------------------------------------------------------------------
45 -- | Attoparsec solution to index test
46 --import Data.Attoparsec.ByteString (Parser, parseOnly, try, string
47 -- , takeTill, take
48 -- , manyTill, many1)
49 --import Data.Attoparsec.ByteString.Char8 (anyChar, isEndOfLine)
50 --import Data.ByteString (ByteString, concat)
51 --import Data.ByteString.Char8 (pack)
52 --import Control.Applicative
53 -- | Attoparsec version
54 --indexParser :: (ByteString -> b) -> ByteString -> Parser b
55 --indexParser form2label x = do
56 -- _ <- manyTill anyChar (string x)
57 -- pure $ form2label x
58
59 --doIndex :: Applicative f => ByteString -> ByteString -> f (Either String [ByteString]
60 --doIndex f x txt = pure $ parseOnly (many $ indexParser f x) txt
61 ------------------------------------------------------------------------
62
63