2 Module : Gargantext.Core.Text.Flow
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 From text to viz, all the flow of texts in Gargantext.
15 module Gargantext.Core.Text.Flow
18 import qualified Data.Text as T
19 --import Data.Text.IO (readFile)
20 import Database.PostgreSQL.Simple (Connection)
21 import GHC.IO (FilePath)
22 --import Gargantext.Core (Lang)
23 import Gargantext.Core.Types (CorpusId)
27 / ___| __ _ _ __ __ _ __ _ _ _|_ _|____ _| |_
28 | | _ / _` | '__/ _` |/ _` | '_ \| |/ _ \ \/ / __|
29 | |_| | (_| | | | (_| | (_| | | | | | __/> <| |_
30 \____|\__,_|_| \__, |\__,_|_| |_|_|\___/_/\_\\__|
35 contextText :: [T.Text]
36 contextText = ["The dog is an animal."
37 ,"The bird is an animal."
38 ,"The dog is an animal."
39 ,"The animal is a bird or a dog ?"
40 ,"The table is an object."
41 ,"The pen is an object."
42 ,"The object is a pen or a table ?"
43 ,"The girl is a human."
44 ,"The boy is a human."
45 ,"The boy or the girl are human."
49 -- | Control the flow of text
50 data TextFlow = CSV FilePath
53 | DBV3 Connection CorpusId
57 textFlow :: TermType Lang -> TextFlow -> IO Graph
58 textFlow termType workType = do
59 contexts <- case workType of
60 FullText path -> splitBy (Sentences 5) <$> readFile path
61 CSV path -> readCsvOn [csv_title, csv_abstract] path
62 Contexts ctxt -> pure ctxt
63 DBV3 con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (_node_hyperdata n) <> hyperdataDocumentV3_abstract (_node_hyperdata n))<$> runReaderT (getDocumentsV3WithParentId corpusId) con
64 _ -> undefined -- TODO Query not supported
66 textFlow' termType contexts
69 textFlow' :: TermType Lang -> [T.Text] -> IO Graph
70 textFlow' termType contexts = do
71 -- Context :: Text -> [Text]
72 -- Contexts = Paragraphs n | Sentences n | Chars n
74 myterms <- extractTerms termType contexts
75 -- TermsType = Mono | Multi | MonoMulti
76 -- myterms # filter (\t -> not . elem t stopList)
77 -- # groupBy (Stem|GroupList|Ontology)
78 --printDebug "terms" myterms
79 --printDebug "myterms" (sum $ map length myterms)
81 -- Bulding the map list
82 -- compute copresences of terms, i.e. cooccurrences of terms in same context of text
83 -- Cooc = Map (Term, Term) Int
84 let myCooc1 = coocOn (_terms_label) myterms
85 --printDebug "myCooc1 size" (M.size myCooc1)
87 -- Remove Apax: appears one time only => lighting the matrix
88 let myCooc2 = Map.filter (>0) myCooc1
89 --printDebug "myCooc2 size" (M.size myCooc2)
90 --printDebug "myCooc2" myCooc2
91 g <- cooc2graph myCooc2