]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/API/Tools.hs
[zip] implement zip for parsers
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / API / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.API
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12
13 module Gargantext.Core.Viz.Phylo.API.Tools
14 where
15
16 import Data.Proxy
17 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
18 import Data.Map (Map)
19 import Data.Maybe (catMaybes)
20 import Data.Set (Set)
21 import Data.Text (Text, pack)
22 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
23 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
24 import Gargantext.API.Ngrams.Prelude (getTermList)
25 import Gargantext.API.Ngrams.Tools (getRepo')
26 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
27 import Gargantext.API.Node.Corpus.Export (getContextNgrams)
28 import Gargantext.API.Prelude (GargNoServer)
29 import Gargantext.Core.Text.Context (TermList)
30 import Gargantext.Core.Types (Context)
31 import Gargantext.Core.Types.Main (ListType(MapTerm))
32 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
33 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
34 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloStep)
35 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
36 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
38 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
39 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
40 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
41 import Gargantext.Database.Schema.Context
42 import Gargantext.Database.Schema.Node
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Prelude
45 import Prelude
46 import System.Process as Shell
47 import qualified Data.ByteString.Lazy as Lazy
48 import qualified Data.List as List
49 import qualified Data.Map as Map
50 import qualified Data.Set as Set
51
52
53 --------------------------------------------------------------------
54 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
55 getPhyloData phyloId = do
56 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
57 pure $ _hp_data $ _node_hyperdata nodePhylo
58
59 putPhylo :: PhyloId -> GargNoServer Phylo
60 putPhylo = undefined
61
62 savePhylo :: PhyloId -> GargNoServer ()
63 savePhylo = undefined
64
65 --------------------------------------------------------------------
66 phylo2dot2json :: Phylo -> IO Value
67 phylo2dot2json phylo = do
68
69 let
70 file_from = "/tmp/fromPhylo.json"
71 file_dot = "/tmp/tmp.dot"
72 file_to_json = "/tmp/toPhylo.json"
73
74 _ <- dotToFile file_from (toPhyloExport phylo)
75 _ <- Shell.callProcess "dot" ["-Tdot", "-o", file_dot, file_from]
76 _ <- Shell.callProcess "dot" ["-Txdot_json", "-o", file_to_json, file_dot]
77
78 maybeValue <- decodeFileStrict file_to_json
79
80 _ <- Shell.callProcess "/bin/rm" ["-rf", file_from, file_to_json, file_dot]
81
82 case maybeValue of
83 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
84 Just v -> pure v
85
86
87
88 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
89 flowPhyloAPI config cId = do
90 (mapList, corpus) <- corpusIdtoDocuments (timeUnit config) cId
91 phyloWithCliques <- pure $ toPhyloStep corpus mapList config
92 -- writePhylo phyloWithCliquesFile phyloWithCliques
93 pure $ toPhylo (setConfig config phyloWithCliques)
94
95 --------------------------------------------------------------------
96 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
97 corpusIdtoDocuments timeUnit corpusId = do
98 docs <- selectDocNodes corpusId
99
100 lId <- defaultList corpusId
101 repo <- getRepo' [lId]
102
103 ngs_terms <- getContextNgrams corpusId lId MapTerm NgramsTerms repo
104 ngs_sources <- getContextNgrams corpusId lId MapTerm Sources repo
105
106 termList <- getTermList lId MapTerm NgramsTerms
107
108 case termList of
109 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
110 Just termList' -> pure (termList', docs')
111 where
112 docs' = catMaybes
113 $ List.map (\doc
114 -> context2phyloDocument timeUnit doc (ngs_terms, ngs_sources)
115 ) docs
116
117
118 context2phyloDocument :: TimeUnit
119 -> Context HyperdataDocument
120 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
121 -> Maybe Document
122 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
123 let contextId = _context_id context
124 (date, date') <- context2date context timeUnit
125 text <- Map.lookup contextId ngs_terms
126 sources <- Map.lookup contextId ngs_sources
127 pure $ Document date date'
128 (toText text)
129 Nothing
130 (toText sources)
131 where
132 toText x = Set.toList $ Set.map unNgramsTerm x
133
134
135 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
136 context2date context timeUnit = do
137 let hyperdata = _context_hyperdata context
138 year <- _hd_publication_year hyperdata
139 month <- _hd_publication_month hyperdata
140 day <- _hd_publication_day hyperdata
141 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
142
143
144 ---------------
145 -- | Dates | --
146 ---------------
147 toMonths :: Integer -> Int -> Int -> Date
148 toMonths y m d = fromIntegral $ cdMonths
149 $ diffGregorianDurationClip (fromGregorian y m d)
150 (fromGregorian 0000 0 0)
151
152 toDays :: Integer -> Int -> Int -> Date
153 toDays y m d = fromIntegral
154 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
155
156 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
157 toPhyloDate y m d tu = case tu of
158 Year _ _ _ -> y
159 Month _ _ _ -> toMonths (Prelude.toInteger y) m d
160 Week _ _ _ -> div (toDays (Prelude.toInteger y) m d) 7
161 Day _ _ _ -> toDays (Prelude.toInteger y) m d
162 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
163
164 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
165 toPhyloDate' y m d tu = case tu of
166 Epoch _ _ _ -> pack $ show $ posixSecondsToUTCTime $ fromIntegral y
167 Year _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
168 Month _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
169 Week _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
170 Day _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
171
172 -- Utils
173
174 writePhylo :: [Char] -> Phylo -> IO ()
175 writePhylo path phylo = Lazy.writeFile path $ encode phylo
176
177
178 readPhylo :: [Char] -> IO Phylo
179 readPhylo path = do
180 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
181 case phyloJson of
182 Left err -> do
183 putStrLn err
184 undefined
185 Right phylo -> pure phylo
186
187
188 -- | To read and decode a Json file
189 readJson :: FilePath -> IO Lazy.ByteString
190 readJson path = Lazy.readFile path
191
192