]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Text/Flow.hs
[UPLOAD] 2 others file format
[gargantext.git] / src / Gargantext / Text / Flow.hs
1 {-|
2 Module : Gargantext.Text.Flow
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 From text to viz, all the flow of texts in Gargantext.
11
12 -}
13
14 {-# LANGUAGE NoImplicitPrelude #-}
15 {-# LANGUAGE OverloadedStrings #-}
16
17 module Gargantext.Text.Flow
18 where
19
20 import qualified Data.Text as T
21 --import Data.Text.IO (readFile)
22 import Database.PostgreSQL.Simple (Connection)
23 import GHC.IO (FilePath)
24 --import Gargantext.Core (Lang)
25 import Gargantext.Core.Types (CorpusId)
26
27 {-
28 ____ _ _
29 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
30 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
31 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
32 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
33 |___/
34 -}
35
36
37 contextText :: [T.Text]
38 contextText = ["The dog is an animal."
39 ,"The bird is an animal."
40 ,"The dog is an animal."
41 ,"The animal is a bird or a dog ?"
42 ,"The table is an object."
43 ,"The pen is an object."
44 ,"The object is a pen or a table ?"
45 ,"The girl is a human."
46 ,"The boy is a human."
47 ,"The boy or the girl are human."
48 ]
49
50
51 -- | Control the flow of text
52 data TextFlow = CSV FilePath
53 | FullText FilePath
54 | Contexts [T.Text]
55 | DBV3 Connection CorpusId
56 | Query T.Text
57
58 {-
59 textFlow :: TermType Lang -> TextFlow -> IO Graph
60 textFlow termType workType = do
61 contexts <- case workType of
62 FullText path -> splitBy (Sentences 5) <$> readFile path
63 CSV path -> readCsvOn [csv_title, csv_abstract] path
64 Contexts ctxt -> pure ctxt
65 DBV3 con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (_node_hyperdata n) <> hyperdataDocumentV3_abstract (_node_hyperdata n))<$> runReaderT (getDocumentsV3WithParentId corpusId) con
66 _ -> undefined -- TODO Query not supported
67
68 textFlow' termType contexts
69
70
71 textFlow' :: TermType Lang -> [T.Text] -> IO Graph
72 textFlow' termType contexts = do
73 -- Context :: Text -> [Text]
74 -- Contexts = Paragraphs n | Sentences n | Chars n
75
76 myterms <- extractTerms termType contexts
77 -- TermsType = Mono | Multi | MonoMulti
78 -- myterms # filter (\t -> not . elem t stopList)
79 -- # groupBy (Stem|GroupList|Ontology)
80 --printDebug "terms" myterms
81 --printDebug "myterms" (sum $ map length myterms)
82
83 -- Bulding the map list
84 -- compute copresences of terms, i.e. cooccurrences of terms in same context of text
85 -- Cooc = Map (Term, Term) Int
86 let myCooc1 = coocOn (_terms_label) myterms
87 --printDebug "myCooc1 size" (M.size myCooc1)
88
89 -- Remove Apax: appears one time only => lighting the matrix
90 let myCooc2 = Map.filter (>0) myCooc1
91 --printDebug "myCooc2 size" (M.size myCooc2)
92 --printDebug "myCooc2" myCooc2
93 g <- cooc2graph myCooc2
94 pure g
95 -}
96