]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/API/Tools.hs
Merge remote-tracking branch 'origin/dev-phylo' into dev-merge
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / API / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.API
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Viz.Phylo.API.Tools
13 where
14
15 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
16 import Data.Map.Strict (Map)
17 import Data.Proxy
18 import Data.Maybe (fromMaybe)
19 import Data.Set (Set)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.API.Prelude (GargNoServer)
26 import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
27 import Gargantext.Core.Types (Context)
28 -- import Gargantext.Core.Types.Individu (User(..))
29 import Gargantext.Core.Types.Main (ListType(MapTerm))
30 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
31 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
32 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
33 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
34 -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
35 -- import Gargantext.Database.Admin.Config (userMaster)
36 -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
38 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
39 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
40 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
41 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
42 import Gargantext.Database.Schema.Context
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node
45 import Gargantext.Prelude
46 import Prelude hiding (map)
47 import System.FilePath ((</>))
48 import System.IO.Temp (withTempDirectory)
49 import System.Process as Shell
50 import qualified Data.ByteString.Lazy as Lazy
51 import qualified Data.Map.Strict as Map
52 import qualified Data.Set as Set
53
54 --------------------------------------------------------------------
55 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
56 getPhyloData phyloId = do
57 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
58 pure $ _hp_data $ _node_hyperdata nodePhylo
59
60 putPhylo :: PhyloId -> GargNoServer Phylo
61 putPhylo = undefined
62
63 savePhylo :: PhyloId -> GargNoServer ()
64 savePhylo = undefined
65
66 --------------------------------------------------------------------
67 phylo2dot2json :: Phylo -> IO Value
68 phylo2dot2json phylo = do
69 withTempDirectory "/tmp" "phylo" $ \dirPath -> do
70 let fileFrom = dirPath </> "phyloFrom.dot"
71 fileDot = dirPath </> "phylo.dot"
72 fileToJson = dirPath </> "output.json"
73
74 dotToFile fileFrom (toPhyloExport phylo)
75
76 -- parsing a file can be done with:
77 -- runParser' (Data.GraphViz.Parsing.parse :: Parse (Data.GraphViz.DotGraph Text)) $ TL.fromStrict f
78 Shell.callProcess "dot" ["-Tdot", "-o", fileDot, fileFrom]
79 Shell.callProcess "dot" ["-Txdot_json", "-o", fileToJson, fileDot]
80
81 maybeValue <- decodeFileStrict fileToJson
82 -- print maybeValue
83
84 case maybeValue of
85 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
86 Just v -> pure v
87
88
89 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
90 flowPhyloAPI config cId = do
91 corpus <- corpusIdtoDocuments (timeUnit config) cId
92 let phyloWithCliques = toPhyloWithoutLink corpus config
93 -- writePhylo phyloWithCliquesFile phyloWithCliques
94 printDebug "PhyloConfig old: " config
95
96 pure $ toPhylo $ setConfig config phyloWithCliques
97
98 --------------------------------------------------------------------
99 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer [Document]
100 corpusIdtoDocuments timeUnit corpusId = do
101 docs <- selectDocNodes corpusId
102 lId <- defaultList corpusId
103 termList <- getTermList lId MapTerm NgramsTerms
104
105 let patterns = case termList of
106 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
107 Just termList' -> buildPatterns termList'
108 pure $ map (toPhyloDocs patterns timeUnit) (map _context_hyperdata docs)
109
110 termsInText' :: Patterns -> Text -> [Text]
111 termsInText' p t = (map fst) $ termsInText p t
112
113 toPhyloDocs :: Patterns -> TimeUnit -> HyperdataDocument -> Document
114 toPhyloDocs patterns time d =
115 let title = fromMaybe "" (_hd_title d)
116 abstr = fromMaybe "" (_hd_abstract d)
117 in Document (toPhyloDate
118 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
119 (fromMaybe 1 $ _hd_publication_month d)
120 (fromMaybe 1 $ _hd_publication_day d) time)
121 (toPhyloDate'
122 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
123 (fromMaybe 1 $ _hd_publication_month d)
124 (fromMaybe 1 $ _hd_publication_day d) time)
125 (termsInText' patterns $ title <> " " <> abstr) Nothing [] time
126
127
128
129 context2phyloDocument :: TimeUnit
130 -> Context HyperdataDocument
131 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
132 -> Maybe Document
133 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
134 let contextId = _context_id context
135 (date, date') <- context2date context timeUnit
136
137 let
138 toText x = Set.toList $ Set.map unNgramsTerm x
139
140 text' = maybe [] toText $ Map.lookup contextId ngs_terms
141 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
142
143 pure $ Document date date' text' Nothing sources' (Year 3 1 5)
144
145
146 -- TODO better default date and log the errors to improve data quality
147 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
148 context2date context timeUnit = do
149 let hyperdata = _context_hyperdata context
150 let
151 year = fromMaybe 1 $ _hd_publication_year hyperdata
152 month = fromMaybe 1 $ _hd_publication_month hyperdata
153 day = fromMaybe 1 $ _hd_publication_day hyperdata
154 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
155
156
157 ---------------
158 -- | Dates | --
159 ---------------
160 toMonths :: Integer -> Int -> Int -> Date
161 toMonths y m d = fromIntegral $ cdMonths
162 $ diffGregorianDurationClip (fromGregorian y m d)
163 (fromGregorian 0000 0 0)
164
165 toDays :: Integer -> Int -> Int -> Date
166 toDays y m d = fromIntegral
167 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
168
169 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
170 toPhyloDate y m d tu = case tu of
171 Year {} -> y
172 Month {} -> toMonths (Prelude.toInteger y) m d
173 Week {} -> div (toDays (Prelude.toInteger y) m d) 7
174 Day {} -> toDays (Prelude.toInteger y) m d
175 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
176
177 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
178 toPhyloDate' y _m _d (Epoch {}) = pack $ show $ posixSecondsToUTCTime $ fromIntegral y
179 toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toInteger y) m d
180
181 -- Utils
182
183 writePhylo :: [Char] -> Phylo -> IO ()
184 writePhylo path phylo = Lazy.writeFile path $ encode phylo
185
186
187 readPhylo :: [Char] -> IO Phylo
188 readPhylo path = do
189 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
190 case phyloJson of
191 Left err -> do
192 putStrLn err
193 undefined
194 Right phylo -> pure phylo
195
196
197 -- | To read and decode a Json file
198 readJson :: FilePath -> IO Lazy.ByteString
199 readJson = Lazy.readFile