]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/API/Tools.hs
[phylo] some small phyloexport refactoring
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / API / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.API
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Viz.Phylo.API.Tools
13 where
14
15 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
16 import Data.Map.Strict (Map)
17 import Data.Maybe (catMaybes)
18 import Data.Proxy
19 import Data.Set (Set)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Tools (getRepo)
25 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
26 import Gargantext.API.Node.Corpus.Export (getContextNgrams)
27 import Gargantext.API.Prelude (GargNoServer)
28 import Gargantext.Core.Text.Context (TermList)
29 import Gargantext.Core.Types (Context)
30 -- import Gargantext.Core.Types.Individu (User(..))
31 import Gargantext.Core.Types.Main (ListType(MapTerm))
32 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
33 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
34 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
35 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
36 -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
37 -- import Gargantext.Database.Admin.Config (userMaster)
38 -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
39 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
42 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
43 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
44 import Gargantext.Database.Schema.Context
45 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
46 import Gargantext.Database.Schema.Node
47 import Gargantext.Prelude
48 import Prelude
49 import System.FilePath ((</>))
50 import System.IO.Temp (withTempDirectory)
51 import System.Process as Shell
52 import qualified Data.ByteString.Lazy as Lazy
53 import qualified Data.List as List
54 import qualified Data.Map.Strict as Map
55 import qualified Data.Set as Set
56
57 --------------------------------------------------------------------
58 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
59 getPhyloData phyloId = do
60 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
61 pure $ _hp_data $ _node_hyperdata nodePhylo
62
63 putPhylo :: PhyloId -> GargNoServer Phylo
64 putPhylo = undefined
65
66 savePhylo :: PhyloId -> GargNoServer ()
67 savePhylo = undefined
68
69 --------------------------------------------------------------------
70 phylo2dot2json :: Phylo -> IO Value
71 phylo2dot2json phylo = do
72 withTempDirectory "/tmp" "phylo" $ \dirPath -> do
73 let fileFrom = dirPath </> "phyloFrom.dot"
74 fileDot = dirPath </> "phylo.dot"
75 fileToJson = dirPath </> "output.json"
76
77 dotToFile fileFrom (toPhyloExport phylo)
78
79 -- parsing a file can be done with:
80 -- runParser' (Data.GraphViz.Parsing.parse :: Parse (Data.GraphViz.DotGraph Text)) $ TL.fromStrict f
81 Shell.callProcess "dot" ["-Tdot", "-o", fileDot, fileFrom]
82 Shell.callProcess "dot" ["-Txdot_json", "-o", fileToJson, fileDot]
83
84 maybeValue <- decodeFileStrict fileToJson
85 print maybeValue
86
87 case maybeValue of
88 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
89 Just v -> pure v
90
91
92 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
93 flowPhyloAPI config cId = do
94 (_, corpus) <- corpusIdtoDocuments (timeUnit config) cId
95 let phyloWithCliques = toPhyloWithoutLink corpus config
96 -- writePhylo phyloWithCliquesFile phyloWithCliques
97 pure $ toPhylo (setConfig config phyloWithCliques)
98
99 --------------------------------------------------------------------
100 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
101 corpusIdtoDocuments timeUnit corpusId = do
102 docs <- selectDocNodes corpusId
103 printDebug "docs *****" (length docs)
104 lId <- defaultList corpusId
105
106 {-
107 (_masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus
108 (UserName userMaster)
109 (Left "")
110 (Nothing :: Maybe HyperdataCorpus)
111 mListId <- defaultList masterCorpusId
112 repo <- getRepo [mListId,lId]
113 -}
114 repo <- getRepo [lId]
115 -- ngs_terms' <- getContextNgrams corpusId mListId MapTerm NgramsTerms repo
116
117 ngs_terms <- getContextNgrams corpusId lId MapTerm NgramsTerms repo
118 printDebug "Size ngs_coterms *****" (length ngs_terms)
119
120 ngs_sources <- getContextNgrams corpusId lId MapTerm Sources repo
121 printDebug "Size ngs_sources Map Sources *****" (length ngs_sources)
122
123 termList <- getTermList lId MapTerm NgramsTerms
124 printDebug "Size ngs_terms List Map Ngrams *****" (length <$> termList)
125
126 let docs'= catMaybes
127 $ List.map (\doc
128 -> context2phyloDocument timeUnit doc (ngs_terms {-<> ngs_terms'-}, ngs_sources)
129 ) docs
130
131 -- printDebug "corpusIdtoDocuments" (Prelude.map date docs')
132
133 case termList of
134 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
135 Just termList' -> pure (termList', docs')
136
137 context2phyloDocument :: TimeUnit
138 -> Context HyperdataDocument
139 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
140 -> Maybe Document
141 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
142 let contextId = _context_id context
143 (date, date') <- context2date context timeUnit
144
145 let
146 toText x = Set.toList $ Set.map unNgramsTerm x
147
148 text' = maybe [] toText $ Map.lookup contextId ngs_terms
149 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
150
151 pure $ Document date date' text' Nothing sources'
152
153
154 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
155 context2date context timeUnit = do
156 let hyperdata = _context_hyperdata context
157 year <- _hd_publication_year hyperdata
158 month <- _hd_publication_month hyperdata
159 day <- _hd_publication_day hyperdata
160 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
161
162
163 ---------------
164 -- | Dates | --
165 ---------------
166 toMonths :: Integer -> Int -> Int -> Date
167 toMonths y m d = fromIntegral $ cdMonths
168 $ diffGregorianDurationClip (fromGregorian y m d)
169 (fromGregorian 0000 0 0)
170
171 toDays :: Integer -> Int -> Int -> Date
172 toDays y m d = fromIntegral
173 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
174
175 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
176 toPhyloDate y m d tu = case tu of
177 Year {} -> y
178 Month {} -> toMonths (Prelude.toInteger y) m d
179 Week {} -> div (toDays (Prelude.toInteger y) m d) 7
180 Day {} -> toDays (Prelude.toInteger y) m d
181 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
182
183 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
184 toPhyloDate' y _m _d (Epoch {}) = pack $ show $ posixSecondsToUTCTime $ fromIntegral y
185 toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toInteger y) m d
186
187 -- Utils
188
189 writePhylo :: [Char] -> Phylo -> IO ()
190 writePhylo path phylo = Lazy.writeFile path $ encode phylo
191
192
193 readPhylo :: [Char] -> IO Phylo
194 readPhylo path = do
195 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
196 case phyloJson of
197 Left err -> do
198 putStrLn err
199 undefined
200 Right phylo -> pure phylo
201
202
203 -- | To read and decode a Json file
204 readJson :: FilePath -> IO Lazy.ByteString
205 readJson = Lazy.readFile