]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Utils/LangDetect_hs
[Structure] Ngrams -> Text.
[gargantext.git] / src / Gargantext / Utils / LangDetect_hs
1
2
3 -- DEFINITIONS as SPECS
4 -- (Engineering axioms for Gargantext)
5
6
7 ------------------------------------------------------------------------
8 -- From file to corpus
9 ------------------------------------------------------------------------
10
11 -- > A Corpus is a list of Documents
12 data Corpus = [Document]
13
14 -- > A Document should have a date, some text and a maybe a language.
15 -- > Remarks :
16 -- > If no date then force one ?
17 -- > Analyze either text or numbers
18 -- > only one language per document
19 data Document = Document { date :: UTCTime
20 , uce :: Map Text $ Either (Maybe Text) (Maybe Double)
21 , lang :: Maybe Language
22 }
23
24 parseFiles :: Maybe ParserType -> [File] -> Corpus
25 parseFiles = undefined
26
27 -- This function exists already (in Python)
28 parseFile' :: ParserType -> File -> Maybe [Document]
29 parseFile' = undefined
30
31 -- This function does not exist yet
32 parseFile :: Maybe ParserType -> File -> Maybe [Document]
33 parseFile parserType file = documents
34 where
35 documents = case parserType of
36
37 Nothing -> case guessParserType file of
38 Nothing -> askUser "Answer to the question with link to $doc"
39 Just parserType' -> parseFile (Just parserType') file
40
41 Just parserType'' -> case parserType'' of
42 UnsupportedYet -> askUser "Not supported yet, which priority ?"
43 otherwise -> parseFile' parserType'' file
44
45 data ParserType = RIS | ISI | XML | CSV | Europresse | Book | UnsupportedYet
46 guessParserType :: File -> Maybe ParserType
47 guessParserType = undefined
48
49
50 ------------------------------------------------------------------------
51 -- What kind of interactions with our users ?
52 ------------------------------------------------------------------------
53
54 -- Question is Text only
55 type Question = Text
56
57 -- Possible Answers:
58 data Answer = ClosedAnswer | NumAnswer | OpenAnswer
59 -- Definitions of the Answers
60 type ClosedAnswer = Bool
61 type OpenAnswer = Text
62 type NumAnswer = Int
63 -- Un formulaire est un mapping entre question et peut-être une réponse
64 -- Un formulaire vide a Nothing au champs (Maybe Answer)
65 -- Une question répondue a la valeur (Just Response)
66 type Formular = Map Question (Maybe Answer)
67
68 askUser :: Question -> ClosedAnswer
69 askUser = undefined
70
71 data Advice = BugReport | WishList
72 askUser' :: Question -> Advice
73 askUser' question = case askUser question of
74 True -> BugReport
75 False -> WishList
76
77
78 ------------------------------------------------------------------------
79 -- Specs for Lang Detection
80 ------------------------------------------------------------------------
81 data Language = English | French
82
83 tagDoc :: Document -> Ngrams
84 tagDoc doc = ngrams
85 where
86 ngrams = case lang doc of
87 Nothing -> case guessLang doc of
88 Nothing -> tag
89
90
91 ------------------------------------------------------------------------
92 -- Specs for ngrams Worflow
93 ------------------------------------------------------------------------