]> Git — Sourcephile - gargantext.git/blob - src-test/Core/Text/Flow.hs
Merge remote-tracking branch 'origin/445-dev-doc-upload-lang' into dev
[gargantext.git] / src-test / Core / Text / Flow.hs
1 {-|
2 Module : Gargantext.Core.Text.Flow
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 From text to viz, all the flow of texts in Gargantext.
11
12 -}
13
14
15 module Core.Text.Flow
16 where
17
18 {-
19 import qualified Data.Text as T
20 --import Data.Text.IO (readFile)
21 import Database.PostgreSQL.Simple (Connection)
22 import GHC.IO (FilePath)
23 --import Gargantext.Core (Lang)
24 import Gargantext.Core.Types (CorpusId)
25 -}
26
27 {-
28 ____ _____ _
29 / ___| __ _ _ __ __ _ __ _ _ _|_ _|____ _| |_
30 | | _ / _` | '__/ _` |/ _` | '_ \| |/ _ \ \/ / __|
31 | |_| | (_| | | | (_| | (_| | | | | | __/> <| |_
32 \____|\__,_|_| \__, |\__,_|_| |_|_|\___/_/\_\\__|
33 |___/
34 -}
35
36
37 {-
38 contextText :: [T.Text]
39 contextText = ["The dog is an animal."
40 ,"The bird is an animal."
41 ,"The dog is an animal."
42 ,"The animal is a bird or a dog ?"
43 ,"The table is an object."
44 ,"The pen is an object."
45 ,"The object is a pen or a table ?"
46 ,"The girl is a human."
47 ,"The boy is a human."
48 ,"The boy or the girl are human."
49 ]
50
51
52 -- | Control the flow of text
53 data TextFlow = CSV FilePath
54 | FullText FilePath
55 | Contexts [T.Text]
56 | DBV3 Connection CorpusId
57 | Query T.Text
58
59 -}
60 {-
61 textFlow :: TermType Lang -> TextFlow -> IO Graph
62 textFlow termType workType = do
63 contexts <- case workType of
64 FullText path -> splitBy (Sentences 5) <$> readFile path
65 CSV path -> readCsvOn [csv_title, csv_abstract] path
66 Contexts ctxt -> pure ctxt
67 DBV3 con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (_node_hyperdata n) <> hyperdataDocumentV3_abstract (_node_hyperdata n))<$> runReaderT (getDocumentsV3WithParentId corpusId) con
68 _ -> undefined -- TODO Query not supported
69
70 textFlow' termType contexts
71
72
73 textFlow' :: TermType Lang -> [T.Text] -> IO Graph
74 textFlow' termType contexts = do
75 -- Context :: Text -> [Text]
76 -- Contexts = Paragraphs n | Sentences n | Chars n
77
78 myterms <- extractTerms termType contexts
79 -- TermsType = Mono | Multi | MonoMulti
80 -- myterms # filter (\t -> not . elem t stopList)
81 -- # groupBy (Stem|GroupList|Ontology)
82 --printDebug "terms" myterms
83 --printDebug "myterms" (sum $ map length myterms)
84
85 -- Bulding the map list
86 -- compute copresences of terms, i.e. cooccurrences of terms in same context of text
87 -- Cooc = Map (Term, Term) Int
88 let myCooc1 = coocOn (_terms_label) myterms
89 --printDebug "myCooc1 size" (M.size myCooc1)
90
91 -- Remove Apax: appears one time only => lighting the matrix
92 let myCooc2 = Map.filter (>0) myCooc1
93 --printDebug "myCooc2 size" (M.size myCooc2)
94 --printDebug "myCooc2" myCooc2
95 g <- cooc2graph myCooc2
96 pure g
97 -}
98