2 Module : Gargantext.Core.Viz.Phylo.API
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
12 module Gargantext.Core.Viz.Phylo.API.Tools
15 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
16 import Data.Map.Strict (Map)
18 import Data.Maybe (fromMaybe)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.API.Prelude (GargNoServer)
26 import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
27 import Gargantext.Core.Types (Context)
28 -- import Gargantext.Core.Types.Individu (User(..))
29 import Gargantext.Core.Types.Main (ListType(MapTerm))
30 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
31 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
32 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
33 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
34 -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
35 -- import Gargantext.Database.Admin.Config (userMaster)
36 -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
38 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
39 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
40 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
41 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
42 import Gargantext.Database.Schema.Context
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node
45 import Gargantext.Prelude
46 import Prelude hiding (map)
47 import System.FilePath ((</>))
48 import System.IO.Temp (withTempDirectory)
49 import System.Process as Shell
50 import qualified Data.ByteString.Lazy as Lazy
51 import qualified Data.Map.Strict as Map
52 import qualified Data.Set as Set
54 --------------------------------------------------------------------
55 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
56 getPhyloData phyloId = do
57 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
58 pure $ _hp_data $ _node_hyperdata nodePhylo
60 putPhylo :: PhyloId -> GargNoServer Phylo
63 savePhylo :: PhyloId -> GargNoServer ()
66 --------------------------------------------------------------------
67 phylo2dot2json :: Phylo -> IO Value
68 phylo2dot2json phylo = do
69 withTempDirectory "/tmp" "phylo" $ \dirPath -> do
70 let fileFrom = dirPath </> "phyloFrom.dot"
71 fileDot = dirPath </> "phylo.dot"
72 fileToJson = dirPath </> "output.json"
74 dotToFile fileFrom (toPhyloExport phylo)
76 -- parsing a file can be done with:
77 -- runParser' (Data.GraphViz.Parsing.parse :: Parse (Data.GraphViz.DotGraph Text)) $ TL.fromStrict f
78 Shell.callProcess "dot" ["-Tdot", "-o", fileDot, fileFrom]
79 Shell.callProcess "dot" ["-Txdot_json", "-o", fileToJson, fileDot]
81 maybeValue <- decodeFileStrict fileToJson
85 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
89 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
90 flowPhyloAPI config cId = do
91 corpus <- corpusIdtoDocuments (timeUnit config) cId
92 let phyloWithCliques = toPhyloWithoutLink corpus config
93 -- writePhylo phyloWithCliquesFile phyloWithCliques
94 pure $ toPhylo (setConfig config phyloWithCliques)
96 --------------------------------------------------------------------
97 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer [Document]
98 corpusIdtoDocuments timeUnit corpusId = do
99 docs <- selectDocNodes corpusId
100 lId <- defaultList corpusId
101 termList <- getTermList lId MapTerm NgramsTerms
103 let patterns = case termList of
104 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
105 Just termList' -> buildPatterns termList'
106 pure $ map (toPhyloDocs patterns timeUnit) (map _context_hyperdata docs)
108 termsInText' :: Patterns -> Text -> [Text]
109 termsInText' p t = (map fst) $ termsInText p t
111 toPhyloDocs :: Patterns -> TimeUnit -> HyperdataDocument -> Document
112 toPhyloDocs patterns time d =
113 let title = fromMaybe "" (_hd_title d)
114 abstr = fromMaybe "" (_hd_abstract d)
115 in Document (toPhyloDate
116 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
117 (fromMaybe 1 $ _hd_publication_month d)
118 (fromMaybe 1 $ _hd_publication_day d) time)
120 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
121 (fromMaybe 1 $ _hd_publication_month d)
122 (fromMaybe 1 $ _hd_publication_day d) time)
123 (termsInText' patterns $ title <> " " <> abstr) Nothing []
127 context2phyloDocument :: TimeUnit
128 -> Context HyperdataDocument
129 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
131 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
132 let contextId = _context_id context
133 (date, date') <- context2date context timeUnit
136 toText x = Set.toList $ Set.map unNgramsTerm x
138 text' = maybe [] toText $ Map.lookup contextId ngs_terms
139 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
141 pure $ Document date date' text' Nothing sources'
144 -- TODO better default date and log the errors to improve data quality
145 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
146 context2date context timeUnit = do
147 let hyperdata = _context_hyperdata context
149 year = fromMaybe 1 $ _hd_publication_year hyperdata
150 month = fromMaybe 1 $ _hd_publication_month hyperdata
151 day = fromMaybe 1 $ _hd_publication_day hyperdata
152 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
158 toMonths :: Integer -> Int -> Int -> Date
159 toMonths y m d = fromIntegral $ cdMonths
160 $ diffGregorianDurationClip (fromGregorian y m d)
161 (fromGregorian 0000 0 0)
163 toDays :: Integer -> Int -> Int -> Date
164 toDays y m d = fromIntegral
165 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
167 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
168 toPhyloDate y m d tu = case tu of
170 Month {} -> toMonths (Prelude.toInteger y) m d
171 Week {} -> div (toDays (Prelude.toInteger y) m d) 7
172 Day {} -> toDays (Prelude.toInteger y) m d
173 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
175 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
176 toPhyloDate' y _m _d (Epoch {}) = pack $ show $ posixSecondsToUTCTime $ fromIntegral y
177 toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toInteger y) m d
181 writePhylo :: [Char] -> Phylo -> IO ()
182 writePhylo path phylo = Lazy.writeFile path $ encode phylo
185 readPhylo :: [Char] -> IO Phylo
187 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
192 Right phylo -> pure phylo
195 -- | To read and decode a Json file
196 readJson :: FilePath -> IO Lazy.ByteString
197 readJson = Lazy.readFile