]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Text/Flow.hs
[ngrams] add score update endpoint + sorting
[gargantext.git] / src / Gargantext / Core / Text / Flow.hs
1 {-|
2 Module : Gargantext.Core.Text.Flow
3 Description : Server API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 From text to viz, all the flow of texts in Gargantext.
11
12 -}
13
14
15 module Gargantext.Core.Text.Flow
16 where
17
18 import qualified Data.Text as T
19 --import Data.Text.IO (readFile)
20 import Database.PostgreSQL.Simple (Connection)
21 import GHC.IO (FilePath)
22 --import Gargantext.Core (Lang)
23 import Gargantext.Core.Types (CorpusId)
24
25 {-
26 ____ _ _
27 / ___| __ _ _ __ __ _ __ _ _ __ | |_ _____ _| |_
28 | | _ / _` | '__/ _` |/ _` | '_ \| __/ _ \ \/ / __|
29 | |_| | (_| | | | (_| | (_| | | | | || __/> <| |_
30 \____|\__,_|_| \__, |\__,_|_| |_|\__\___/_/\_\\__|
31 |___/
32 -}
33
34
35 contextText :: [T.Text]
36 contextText = ["The dog is an animal."
37 ,"The bird is an animal."
38 ,"The dog is an animal."
39 ,"The animal is a bird or a dog ?"
40 ,"The table is an object."
41 ,"The pen is an object."
42 ,"The object is a pen or a table ?"
43 ,"The girl is a human."
44 ,"The boy is a human."
45 ,"The boy or the girl are human."
46 ]
47
48
49 -- | Control the flow of text
50 data TextFlow = CSV FilePath
51 | FullText FilePath
52 | Contexts [T.Text]
53 | DBV3 Connection CorpusId
54 | Query T.Text
55
56 {-
57 textFlow :: TermType Lang -> TextFlow -> IO Graph
58 textFlow termType workType = do
59 contexts <- case workType of
60 FullText path -> splitBy (Sentences 5) <$> readFile path
61 CSV path -> readCsvOn [csv_title, csv_abstract] path
62 Contexts ctxt -> pure ctxt
63 DBV3 con corpusId -> catMaybes <$> map (\n -> hyperdataDocumentV3_title (_node_hyperdata n) <> hyperdataDocumentV3_abstract (_node_hyperdata n))<$> runReaderT (getDocumentsV3WithParentId corpusId) con
64 _ -> undefined -- TODO Query not supported
65
66 textFlow' termType contexts
67
68
69 textFlow' :: TermType Lang -> [T.Text] -> IO Graph
70 textFlow' termType contexts = do
71 -- Context :: Text -> [Text]
72 -- Contexts = Paragraphs n | Sentences n | Chars n
73
74 myterms <- extractTerms termType contexts
75 -- TermsType = Mono | Multi | MonoMulti
76 -- myterms # filter (\t -> not . elem t stopList)
77 -- # groupBy (Stem|GroupList|Ontology)
78 --printDebug "terms" myterms
79 --printDebug "myterms" (sum $ map length myterms)
80
81 -- Bulding the map list
82 -- compute copresences of terms, i.e. cooccurrences of terms in same context of text
83 -- Cooc = Map (Term, Term) Int
84 let myCooc1 = coocOn (_terms_label) myterms
85 --printDebug "myCooc1 size" (M.size myCooc1)
86
87 -- Remove Apax: appears one time only => lighting the matrix
88 let myCooc2 = Map.filter (>0) myCooc1
89 --printDebug "myCooc2 size" (M.size myCooc2)
90 --printDebug "myCooc2" myCooc2
91 g <- cooc2graph myCooc2
92 pure g
93 -}
94