2 Module : Gargantext.Core.Viz.Phylo.API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.API.Tools
15 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
16 import Data.Map.Strict (Map)
17 import Data.Maybe (catMaybes)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Tools (getRepo)
25 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
26 import Gargantext.API.Node.Corpus.Export (getContextNgrams)
27 import Gargantext.API.Prelude (GargNoServer)
28 import Gargantext.Core.Text.Context (TermList)
29 import Gargantext.Core.Types (Context)
30 -- import Gargantext.Core.Types.Individu (User(..))
31 import Gargantext.Core.Types.Main (ListType(MapTerm))
32 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
33 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
34 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
35 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
36 -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
37 -- import Gargantext.Database.Admin.Config (userMaster)
38 -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
39 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
42 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
43 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
44 import Gargantext.Database.Schema.Context
45 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
46 import Gargantext.Database.Schema.Node
47 import Gargantext.Prelude
49 import System.FilePath ((</>))
50 import System.IO.Temp (withTempDirectory)
51 import System.Process as Shell
52 import qualified Data.ByteString.Lazy as Lazy
53 import qualified Data.List as List
54 import qualified Data.Map.Strict as Map
55 import qualified Data.Set as Set
57 --------------------------------------------------------------------
58 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
59 getPhyloData phyloId = do
60 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
61 pure $ _hp_data $ _node_hyperdata nodePhylo
63 putPhylo :: PhyloId -> GargNoServer Phylo
66 savePhylo :: PhyloId -> GargNoServer ()
69 --------------------------------------------------------------------
70 phylo2dot2json :: Phylo -> IO Value
71 phylo2dot2json phylo = do
72 withTempDirectory "/tmp" "phylo" $ \dirPath -> do
73 let fileFrom = dirPath </> "phyloFrom.dot"
74 fileDot = dirPath </> "phylo.dot"
75 fileToJson = dirPath </> "output.json"
77 dotToFile fileFrom (toPhyloExport phylo)
79 -- parsing a file can be done with:
80 -- runParser' (Data.GraphViz.Parsing.parse :: Parse (Data.GraphViz.DotGraph Text)) $ TL.fromStrict f
81 Shell.callProcess "dot" ["-Tdot", "-o", fileDot, fileFrom]
82 Shell.callProcess "dot" ["-Txdot_json", "-o", fileToJson, fileDot]
84 maybeValue <- decodeFileStrict fileToJson
88 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
92 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
93 flowPhyloAPI config cId = do
94 (_, corpus) <- corpusIdtoDocuments (timeUnit config) cId
95 let phyloWithCliques = toPhyloWithoutLink corpus config
96 -- writePhylo phyloWithCliquesFile phyloWithCliques
97 pure $ toPhylo (setConfig config phyloWithCliques)
99 --------------------------------------------------------------------
100 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
101 corpusIdtoDocuments timeUnit corpusId = do
102 docs <- selectDocNodes corpusId
103 printDebug "docs *****" (length docs)
104 lId <- defaultList corpusId
107 (_masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus
108 (UserName userMaster)
110 (Nothing :: Maybe HyperdataCorpus)
111 mListId <- defaultList masterCorpusId
112 repo <- getRepo [mListId,lId]
114 repo <- getRepo [lId]
115 -- ngs_terms' <- getContextNgrams corpusId mListId MapTerm NgramsTerms repo
117 ngs_terms <- getContextNgrams corpusId lId MapTerm NgramsTerms repo
118 printDebug "Size ngs_coterms *****" (length ngs_terms)
120 ngs_sources <- getContextNgrams corpusId lId MapTerm Sources repo
121 printDebug "Size ngs_sources Map Sources *****" (length ngs_sources)
123 termList <- getTermList lId MapTerm NgramsTerms
124 printDebug "Size ngs_terms List Map Ngrams *****" (length <$> termList)
128 -> context2phyloDocument timeUnit doc (ngs_terms {-<> ngs_terms'-}, ngs_sources)
131 -- printDebug "corpusIdtoDocuments" (Prelude.map date docs')
134 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
135 Just termList' -> pure (termList', docs')
137 context2phyloDocument :: TimeUnit
138 -> Context HyperdataDocument
139 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
141 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
142 let contextId = _context_id context
143 (date, date') <- context2date context timeUnit
146 toText x = Set.toList $ Set.map unNgramsTerm x
148 text' = maybe [] toText $ Map.lookup contextId ngs_terms
149 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
151 pure $ Document date date' text' Nothing sources'
154 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
155 context2date context timeUnit = do
156 let hyperdata = _context_hyperdata context
157 year <- _hd_publication_year hyperdata
158 month <- _hd_publication_month hyperdata
159 day <- _hd_publication_day hyperdata
160 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
166 toMonths :: Integer -> Int -> Int -> Date
167 toMonths y m d = fromIntegral $ cdMonths
168 $ diffGregorianDurationClip (fromGregorian y m d)
169 (fromGregorian 0000 0 0)
171 toDays :: Integer -> Int -> Int -> Date
172 toDays y m d = fromIntegral
173 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
175 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
176 toPhyloDate y m d tu = case tu of
178 Month {} -> toMonths (Prelude.toInteger y) m d
179 Week {} -> div (toDays (Prelude.toInteger y) m d) 7
180 Day {} -> toDays (Prelude.toInteger y) m d
181 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
183 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
184 toPhyloDate' y _m _d (Epoch {}) = pack $ show $ posixSecondsToUTCTime $ fromIntegral y
185 toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toInteger y) m d
189 writePhylo :: [Char] -> Phylo -> IO ()
190 writePhylo path phylo = Lazy.writeFile path $ encode phylo
193 readPhylo :: [Char] -> IO Phylo
195 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
200 Right phylo -> pure phylo
203 -- | To read and decode a Json file
204 readJson :: FilePath -> IO Lazy.ByteString
205 readJson = Lazy.readFile