2 Module : Gargantext.Core.Viz.Phylo.API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.API.Tools
15 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
16 import Data.Map.Strict (Map)
17 import Data.Maybe (catMaybes)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Tools (getRepo)
25 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
26 import Gargantext.API.Node.Corpus.Export (getContextNgrams)
27 import Gargantext.API.Prelude (GargNoServer)
28 import Gargantext.Core.Text.Context (TermList)
29 import Gargantext.Core.Types (Context)
30 -- import Gargantext.Core.Types.Individu (User(..))
31 import Gargantext.Core.Types.Main (ListType(MapTerm))
32 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
33 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
34 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
35 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
36 -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
37 -- import Gargantext.Database.Admin.Config (userMaster)
38 -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
39 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
40 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
41 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
42 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
43 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
44 import Gargantext.Database.Schema.Context
45 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
46 import Gargantext.Database.Schema.Node
47 import Gargantext.Prelude
49 import System.FilePath ((</>))
50 import System.IO.Temp (withTempDirectory)
51 import System.Process as Shell
52 import qualified Data.ByteString.Lazy as Lazy
53 import qualified Data.List as List
54 import qualified Data.Map.Strict as Map
55 import qualified Data.Set as Set
57 --------------------------------------------------------------------
58 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
59 getPhyloData phyloId = do
60 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
61 pure $ _hp_data $ _node_hyperdata nodePhylo
63 putPhylo :: PhyloId -> GargNoServer Phylo
66 savePhylo :: PhyloId -> GargNoServer ()
69 --------------------------------------------------------------------
70 phylo2dot2json :: Phylo -> IO Value
71 phylo2dot2json phylo = do
72 withTempDirectory "/tmp" "phylo" $ \dirPath -> do
73 let fileFrom = dirPath </> "phyloFrom.dot"
74 fileDot = dirPath </> "phylo.dot"
75 fileToJson = dirPath </> "output.json"
77 dotToFile fileFrom (toPhyloExport phylo)
79 -- parsing a file can be done with:
80 -- runParser' (Data.GraphViz.Parsing.parse :: Parse (Data.GraphViz.DotGraph Text)) $ TL.fromStrict f
82 Shell.callProcess "dot" ["-Tdot", "-o", fileDot, fileFrom]
83 Shell.callProcess "dot" ["-Txdot_json", "-o", fileToJson, fileDot]
85 maybeValue <- decodeFileStrict fileToJson
89 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
93 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
94 flowPhyloAPI config cId = do
95 (_, corpus) <- corpusIdtoDocuments (timeUnit config) cId
96 let phyloWithCliques = toPhyloWithoutLink corpus config
97 -- writePhylo phyloWithCliquesFile phyloWithCliques
98 pure $ toPhylo (setConfig config phyloWithCliques)
100 --------------------------------------------------------------------
101 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
102 corpusIdtoDocuments timeUnit corpusId = do
103 docs <- selectDocNodes corpusId
104 printDebug "docs *****" (length docs)
105 lId <- defaultList corpusId
108 (_masterUserId, _masterRootId, masterCorpusId) <- getOrMk_RootWithCorpus
109 (UserName userMaster)
111 (Nothing :: Maybe HyperdataCorpus)
112 mListId <- defaultList masterCorpusId
113 repo <- getRepo [mListId,lId]
115 repo <- getRepo [lId]
116 -- ngs_terms' <- getContextNgrams corpusId mListId MapTerm NgramsTerms repo
118 ngs_terms <- getContextNgrams corpusId lId MapTerm NgramsTerms repo
119 printDebug "Size ngs_coterms *****" (length ngs_terms)
121 ngs_sources <- getContextNgrams corpusId lId MapTerm Sources repo
122 printDebug "Size ngs_sources Map Sources *****" (length ngs_sources)
124 termList <- getTermList lId MapTerm NgramsTerms
125 printDebug "Size ngs_terms List Map Ngrams *****" (length <$> termList)
129 -> context2phyloDocument timeUnit doc (ngs_terms {-<> ngs_terms'-}, ngs_sources)
132 -- printDebug "corpusIdtoDocuments" (Prelude.map date docs')
135 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
136 Just termList' -> pure (termList', docs')
138 context2phyloDocument :: TimeUnit
139 -> Context HyperdataDocument
140 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
142 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
143 let contextId = _context_id context
144 (date, date') <- context2date context timeUnit
147 toText x = Set.toList $ Set.map unNgramsTerm x
149 text' = maybe [] toText $ Map.lookup contextId ngs_terms
150 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
152 pure $ Document date date' text' Nothing sources'
155 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
156 context2date context timeUnit = do
157 let hyperdata = _context_hyperdata context
158 year <- _hd_publication_year hyperdata
159 month <- _hd_publication_month hyperdata
160 day <- _hd_publication_day hyperdata
161 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
167 toMonths :: Integer -> Int -> Int -> Date
168 toMonths y m d = fromIntegral $ cdMonths
169 $ diffGregorianDurationClip (fromGregorian y m d)
170 (fromGregorian 0000 0 0)
172 toDays :: Integer -> Int -> Int -> Date
173 toDays y m d = fromIntegral
174 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
176 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
177 toPhyloDate y m d tu = case tu of
179 Month {} -> toMonths (Prelude.toInteger y) m d
180 Week {} -> div (toDays (Prelude.toInteger y) m d) 7
181 Day {} -> toDays (Prelude.toInteger y) m d
182 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
184 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
185 toPhyloDate' y _m _d (Epoch {}) = pack $ show $ posixSecondsToUTCTime $ fromIntegral y
186 toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toInteger y) m d
190 writePhylo :: [Char] -> Phylo -> IO ()
191 writePhylo path phylo = Lazy.writeFile path $ encode phylo
194 readPhylo :: [Char] -> IO Phylo
196 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
201 Right phylo -> pure phylo
204 -- | To read and decode a Json file
205 readJson :: FilePath -> IO Lazy.ByteString
206 readJson = Lazy.readFile