2 Module : Gargantext.Core.Viz.Phylo.API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.API.Tools
15 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
16 import Data.Map.Strict (Map)
18 import Data.Maybe (fromMaybe)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.API.Prelude (GargNoServer)
26 import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
27 import Gargantext.Core.Types (Context)
28 -- import Gargantext.Core.Types.Individu (User(..))
29 import Gargantext.Core.Types.Main (ListType(MapTerm))
30 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
31 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
32 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
33 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
34 -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
35 -- import Gargantext.Database.Admin.Config (userMaster)
36 -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
38 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
39 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
40 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
41 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
42 import Gargantext.Database.Schema.Context
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node
45 import Gargantext.Prelude
46 import Prelude hiding (map)
47 import System.FilePath ((</>))
48 import System.IO.Temp (withTempDirectory)
49 import System.Process as Shell
50 import qualified Data.ByteString.Lazy as Lazy
51 import qualified Data.Map.Strict as Map
52 import qualified Data.Set as Set
54 --------------------------------------------------------------------
55 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
56 getPhyloData phyloId = do
57 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
58 pure $ _hp_data $ _node_hyperdata nodePhylo
60 putPhylo :: PhyloId -> GargNoServer Phylo
63 savePhylo :: PhyloId -> GargNoServer ()
66 --------------------------------------------------------------------
67 phylo2dot2json :: Phylo -> IO Value
68 phylo2dot2json phylo = do
69 withTempDirectory "/tmp" "phylo" $ \dirPath -> do
70 let fileFrom = dirPath </> "phyloFrom.dot"
71 fileDot = dirPath </> "phylo.dot"
72 fileToJson = dirPath </> "output.json"
74 dotToFile fileFrom (toPhyloExport phylo)
76 -- parsing a file can be done with:
77 -- runParser' (Data.GraphViz.Parsing.parse :: Parse (Data.GraphViz.DotGraph Text)) $ TL.fromStrict f
78 Shell.callProcess "dot" ["-Tdot", "-o", fileDot, fileFrom]
79 Shell.callProcess "dot" ["-Txdot_json", "-o", fileToJson, fileDot]
81 maybeValue <- decodeFileStrict fileToJson
85 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
89 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
90 flowPhyloAPI config cId = do
91 corpus <- corpusIdtoDocuments (timeUnit config) cId
92 let phyloWithCliques = toPhyloWithoutLink corpus config
93 -- writePhylo phyloWithCliquesFile phyloWithCliques
94 printDebug "PhyloConfig old: " config
96 pure $ toPhylo $ setConfig config phyloWithCliques
98 --------------------------------------------------------------------
99 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer [Document]
100 corpusIdtoDocuments timeUnit corpusId = do
101 docs <- selectDocNodes corpusId
102 lId <- defaultList corpusId
103 termList <- getTermList lId MapTerm NgramsTerms
105 let patterns = case termList of
106 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
107 Just termList' -> buildPatterns termList'
108 pure $ map (toPhyloDocs patterns timeUnit) (map _context_hyperdata docs)
110 termsInText' :: Patterns -> Text -> [Text]
111 termsInText' p t = (map fst) $ termsInText p t
113 toPhyloDocs :: Patterns -> TimeUnit -> HyperdataDocument -> Document
114 toPhyloDocs patterns time d =
115 let title = fromMaybe "" (_hd_title d)
116 abstr = fromMaybe "" (_hd_abstract d)
117 in Document (toPhyloDate
118 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
119 (fromMaybe 1 $ _hd_publication_month d)
120 (fromMaybe 1 $ _hd_publication_day d) time)
122 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
123 (fromMaybe 1 $ _hd_publication_month d)
124 (fromMaybe 1 $ _hd_publication_day d) time)
125 (termsInText' patterns $ title <> " " <> abstr) Nothing [] time
129 context2phyloDocument :: TimeUnit
130 -> Context HyperdataDocument
131 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
133 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
134 let contextId = _context_id context
135 (date, date') <- context2date context timeUnit
138 toText x = Set.toList $ Set.map unNgramsTerm x
140 text' = maybe [] toText $ Map.lookup contextId ngs_terms
141 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
143 pure $ Document date date' text' Nothing sources' (Year 3 1 5)
146 -- TODO better default date and log the errors to improve data quality
147 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
148 context2date context timeUnit = do
149 let hyperdata = _context_hyperdata context
151 year = fromMaybe 1 $ _hd_publication_year hyperdata
152 month = fromMaybe 1 $ _hd_publication_month hyperdata
153 day = fromMaybe 1 $ _hd_publication_day hyperdata
154 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
160 toMonths :: Integer -> Int -> Int -> Date
161 toMonths y m d = fromIntegral $ cdMonths
162 $ diffGregorianDurationClip (fromGregorian y m d)
163 (fromGregorian 0000 0 0)
165 toDays :: Integer -> Int -> Int -> Date
166 toDays y m d = fromIntegral
167 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
169 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
170 toPhyloDate y m d tu = case tu of
172 Month {} -> toMonths (Prelude.toInteger y) m d
173 Week {} -> div (toDays (Prelude.toInteger y) m d) 7
174 Day {} -> toDays (Prelude.toInteger y) m d
175 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
177 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
178 toPhyloDate' y _m _d (Epoch {}) = pack $ show $ posixSecondsToUTCTime $ fromIntegral y
179 toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toInteger y) m d
183 writePhylo :: [Char] -> Phylo -> IO ()
184 writePhylo path phylo = Lazy.writeFile path $ encode phylo
187 readPhylo :: [Char] -> IO Phylo
189 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
194 Right phylo -> pure phylo
197 -- | To read and decode a Json file
198 readJson :: FilePath -> IO Lazy.ByteString
199 readJson = Lazy.readFile