2 Module : Gargantext.Core.Text.Flow
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 From text to viz, all the flow of texts in Gargantext.
19 import qualified Data.Text as T
20 --import Data.Text.IO (readFile)
21 import Database.PostgreSQL.Simple (Connection)
22 import GHC.IO (FilePath)
23 --import Gargantext.Core (Lang)
24 import Gargantext.Core.Types (CorpusId)
29 / ___| __ _ _ __ __ _ __ _ _ _|_ _|____ _| |_
30 | | _ / _` | '__/ _` |/ _` | '_ \| |/ _ \ \/ / __|
31 | |_| | (_| | | | (_| | (_| | | | | | __/> <| |_
32 \____|\__,_|_| \__, |\__,_|_| |_|_|\___/_/\_\\__|
38 contextText :: [T.Text]
39 contextText = ["The dog is an animal."
40 ,"The bird is an animal."
41 ,"The dog is an animal."
42 ,"The animal is a bird or a dog ?"
43 ,"The table is an object."
44 ,"The pen is an object."
45 ,"The object is a pen or a table ?"
46 ,"The girl is a human."
47 ,"The boy is a human."
48 ,"The boy or the girl are human."
52 -- | Control the flow of text
53 data TextFlow = CSV FilePath
56 | DBV3 Connection CorpusId
61 textFlow :: TermType Lang -> TextFlow -> IO Graph
62 textFlow termType workType = do
63 contexts <- case workType of
64 FullText path -> splitBy (Sentences 5) <$> readFile path
65 CSV path -> readCsvOn [csv_title, csv_abstract] path
66 Contexts ctxt -> pure ctxt
67 DBV3 con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (_node_hyperdata n) <> hyperdataDocumentV3_abstract (_node_hyperdata n))<$> runReaderT (getDocumentsV3WithParentId corpusId) con
68 _ -> undefined -- TODO Query not supported
70 textFlow' termType contexts
73 textFlow' :: TermType Lang -> [T.Text] -> IO Graph
74 textFlow' termType contexts = do
75 -- Context :: Text -> [Text]
76 -- Contexts = Paragraphs n | Sentences n | Chars n
78 myterms <- extractTerms termType contexts
79 -- TermsType = Mono | Multi | MonoMulti
80 -- myterms # filter (\t -> not . elem t stopList)
81 -- # groupBy (Stem|GroupList|Ontology)
82 --printDebug "terms" myterms
83 --printDebug "myterms" (sum $ map length myterms)
85 -- Bulding the map list
86 -- compute copresences of terms, i.e. cooccurrences of terms in same context of text
87 -- Cooc = Map (Term, Term) Int
88 let myCooc1 = coocOn (_terms_label) myterms
89 --printDebug "myCooc1 size" (M.size myCooc1)
91 -- Remove Apax: appears one time only => lighting the matrix
92 let myCooc2 = Map.filter (>0) myCooc1
93 --printDebug "myCooc2 size" (M.size myCooc2)
94 --printDebug "myCooc2" myCooc2
95 g <- cooc2graph myCooc2