]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/API/Tools.hs
[text-api] first rewrite using Conduit
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / API / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.API
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Viz.Phylo.API.Tools
14 where
15
16 import Data.Proxy
17 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
18 import Data.Map (Map)
19 import Data.Maybe (catMaybes)
20 import Data.Set (Set)
21 import Data.Text (Text, pack)
22 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
23 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
24 import Gargantext.API.Ngrams.Prelude (getTermList)
25 import Gargantext.API.Ngrams.Tools (getRepo')
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.API.Node.Corpus.Export (getContextNgrams)
28 import Gargantext.API.Prelude (GargNoServer)
29 import Gargantext.Core.Text.Context (TermList)
30 import Gargantext.Core.Types (Context)
31 import Gargantext.Core.Types.Main (ListType(MapTerm))
32 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), Config(..), Phylo)
33 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
34 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
35 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
36 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
38 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
39 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
40 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
41 import Gargantext.Database.Schema.Context
42 import Gargantext.Database.Schema.Node
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Prelude
45 import Prelude as Prelude
46 import System.Process as Shell
47 import qualified Data.ByteString.Lazy as Lazy
48 import qualified Data.List as List
49 import qualified Data.Map as Map
50 import qualified Data.Set as Set
51
52
53 --------------------------------------------------------------------
54 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
55 getPhyloData phyloId = do
56 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
57 pure $ _hp_data $ _node_hyperdata nodePhylo
58
59 putPhylo :: PhyloId -> GargNoServer Phylo
60 putPhylo = undefined
61
62 savePhylo :: PhyloId -> GargNoServer ()
63 savePhylo = undefined
64
65 --------------------------------------------------------------------
66 phylo2dot2json :: Phylo -> IO Value
67 phylo2dot2json phylo = do
68
69 let
70 file_from = "/tmp/fromPhylo.json"
71 file_dot = "/tmp/tmp.dot"
72 file_to_json = "/tmp/toPhylo.json"
73
74 _ <- dotToFile file_from (toPhyloExport phylo)
75 _ <- Shell.callProcess "/usr/bin/dot" ["-Tdot", "-o", file_dot, file_from]
76 _ <- Shell.callProcess "/usr/bin/dot" ["-Txdot_json", "-o", file_to_json, file_dot]
77
78 maybeValue <- decodeFileStrict file_to_json
79
80 _ <- Shell.callProcess "/bin/rm" ["-rf", file_from, file_to_json, file_dot]
81
82 case maybeValue of
83 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
84 Just v -> pure v
85
86
87
88 flowPhyloAPI :: Config -> CorpusId -> GargNoServer Phylo
89 flowPhyloAPI config cId = do
90 (mapList, corpus) <- corpusIdtoDocuments (timeUnit config) cId
91 phyloWithCliques <- pure $ toPhyloStep corpus mapList config
92 -- writePhylo phyloWithCliquesFile phyloWithCliques
93 pure $ toPhylo (setConfig config phyloWithCliques)
94
95 --------------------------------------------------------------------
96 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
97 corpusIdtoDocuments timeUnit corpusId = do
98 docs <- selectDocNodes corpusId
99
100 lId <- defaultList corpusId
101 repo <- getRepo' [lId]
102
103 ngs_terms <- getContextNgrams corpusId lId MapTerm NgramsTerms repo
104 ngs_sources <- getContextNgrams corpusId lId MapTerm Sources repo
105
106 termList <- getTermList lId MapTerm NgramsTerms
107
108 case termList of
109 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
110 Just termList' -> pure (termList', docs')
111 where
112 docs' = catMaybes
113 $ List.map (\doc
114 -> context2phyloDocument timeUnit doc (ngs_terms, ngs_sources)
115 ) docs
116
117
118 context2phyloDocument :: TimeUnit
119 -> Context HyperdataDocument
120 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
121 -> Maybe Document
122 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
123 let contextId = _context_id context
124 (date, date') <- context2date context timeUnit
125 text <- Map.lookup contextId ngs_terms
126 sources <- Map.lookup contextId ngs_sources
127 pure $ Document date date'
128 (toText text)
129 Nothing
130 (toText sources)
131 where
132 toText x = Set.toList $ Set.map unNgramsTerm x
133
134
135 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
136 context2date context timeUnit = do
137 let hyperdata = _context_hyperdata context
138 year <- _hd_publication_year hyperdata
139 month <- _hd_publication_month hyperdata
140 day <- _hd_publication_day hyperdata
141 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
142
143
144 ---------------
145 -- | Dates | --
146 ---------------
147 toMonths :: Integer -> Int -> Int -> Date
148 toMonths y m d = fromIntegral $ cdMonths
149 $ diffGregorianDurationClip (fromGregorian y m d)
150 (fromGregorian 0000 0 0)
151
152
153 toDays :: Integer -> Int -> Int -> Date
154 toDays y m d = fromIntegral
155 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
156
157
158 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
159 toPhyloDate y m d tu = case tu of
160 Year _ _ _ -> y
161 Month _ _ _ -> toMonths (Prelude.toInteger y) m d
162 Week _ _ _ -> div (toDays (Prelude.toInteger y) m d) 7
163 Day _ _ _ -> toDays (Prelude.toInteger y) m d
164 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
165
166 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
167 toPhyloDate' y m d tu = case tu of
168 Epoch _ _ _ -> pack $ show $ posixSecondsToUTCTime $ fromIntegral y
169 Year _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
170 Month _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
171 Week _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
172 Day _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
173
174 -- Utils
175
176 writePhylo :: [Char] -> Phylo -> IO ()
177 writePhylo path phylo = Lazy.writeFile path $ encode phylo
178
179
180 readPhylo :: [Char] -> IO Phylo
181 readPhylo path = do
182 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
183 case phyloJson of
184 Left err -> do
185 putStrLn err
186 undefined
187 Right phylo -> pure phylo
188
189
190 -- | To read and decode a Json file
191 readJson :: FilePath -> IO Lazy.ByteString
192 readJson path = Lazy.readFile path
193
194
195
196