]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/API/Tools.hs
Merge remote-tracking branch 'origin/adinapoli/fix-phylo-types' into dev-merge
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / API / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.API
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Viz.Phylo.API.Tools
13 where
14
15 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
16 import Data.Map.Strict (Map)
17 import Data.Proxy
18 import Data.Maybe (fromMaybe)
19 import Data.Set (Set)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
25 import Gargantext.API.Prelude (GargNoServer)
26 import Gargantext.Core.Text.Terms.WithList (Patterns, buildPatterns, termsInText)
27 import Gargantext.Core.Types (Context)
28 -- import Gargantext.Core.Types.Individu (User(..))
29 import Gargantext.Core.Types.Main (ListType(MapTerm))
30 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
31 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
32 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
33 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
34 -- import Gargantext.Database.Action.Flow (getOrMk_RootWithCorpus)
35 -- import Gargantext.Database.Admin.Config (userMaster)
36 -- import Gargantext.Database.Admin.Types.Hyperdata (HyperdataCorpus)
37 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
38 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
39 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
40 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
41 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
42 import Gargantext.Database.Schema.Context
43 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
44 import Gargantext.Database.Schema.Node
45 import Gargantext.Prelude
46 import Prelude hiding (map)
47 import System.FilePath ((</>))
48 import System.IO.Temp (withTempDirectory)
49 import System.Process as Shell
50 import qualified Data.ByteString.Lazy as Lazy
51 import qualified Data.Map.Strict as Map
52 import qualified Data.Set as Set
53
54 --------------------------------------------------------------------
55 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
56 getPhyloData phyloId = do
57 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
58 pure $ _hp_data $ _node_hyperdata nodePhylo
59
60 putPhylo :: PhyloId -> GargNoServer Phylo
61 putPhylo = undefined
62
63 savePhylo :: PhyloId -> GargNoServer ()
64 savePhylo = undefined
65
66 --------------------------------------------------------------------
67 phylo2dot2json :: Phylo -> IO Value
68 phylo2dot2json phylo = do
69 withTempDirectory "/tmp" "phylo" $ \dirPath -> do
70 let fileFrom = dirPath </> "phyloFrom.dot"
71 fileDot = dirPath </> "phylo.dot"
72 fileToJson = dirPath </> "output.json"
73
74 dotToFile fileFrom (toPhyloExport phylo)
75
76 -- parsing a file can be done with:
77 -- runParser' (Data.GraphViz.Parsing.parse :: Parse (Data.GraphViz.DotGraph Text)) $ TL.fromStrict f
78 Shell.callProcess "dot" ["-Tdot", "-o", fileDot, fileFrom]
79 Shell.callProcess "dot" ["-Txdot_json", "-o", fileToJson, fileDot]
80
81 maybeValue <- decodeFileStrict fileToJson
82 print maybeValue
83
84 case maybeValue of
85 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
86 Just v -> pure v
87
88
89 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
90 flowPhyloAPI config cId = do
91 corpus <- corpusIdtoDocuments (timeUnit config) cId
92 let phyloWithCliques = toPhyloWithoutLink corpus config
93 -- writePhylo phyloWithCliquesFile phyloWithCliques
94 pure $ toPhylo (setConfig config phyloWithCliques)
95
96 --------------------------------------------------------------------
97 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer [Document]
98 corpusIdtoDocuments timeUnit corpusId = do
99 docs <- selectDocNodes corpusId
100 lId <- defaultList corpusId
101 termList <- getTermList lId MapTerm NgramsTerms
102
103 let patterns = case termList of
104 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
105 Just termList' -> buildPatterns termList'
106 pure $ map (toPhyloDocs patterns timeUnit) (map _context_hyperdata docs)
107
108 termsInText' :: Patterns -> Text -> [Text]
109 termsInText' p t = (map fst) $ termsInText p t
110
111 toPhyloDocs :: Patterns -> TimeUnit -> HyperdataDocument -> Document
112 toPhyloDocs patterns time d =
113 let title = fromMaybe "" (_hd_title d)
114 abstr = fromMaybe "" (_hd_abstract d)
115 in Document (toPhyloDate
116 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
117 (fromMaybe 1 $ _hd_publication_month d)
118 (fromMaybe 1 $ _hd_publication_day d) time)
119 (toPhyloDate'
120 (fromIntegral $ fromMaybe 1 $ _hd_publication_year d)
121 (fromMaybe 1 $ _hd_publication_month d)
122 (fromMaybe 1 $ _hd_publication_day d) time)
123 (termsInText' patterns $ title <> " " <> abstr) Nothing []
124
125
126
127 context2phyloDocument :: TimeUnit
128 -> Context HyperdataDocument
129 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
130 -> Maybe Document
131 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
132 let contextId = _context_id context
133 (date, date') <- context2date context timeUnit
134
135 let
136 toText x = Set.toList $ Set.map unNgramsTerm x
137
138 text' = maybe [] toText $ Map.lookup contextId ngs_terms
139 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
140
141 pure $ Document date date' text' Nothing sources'
142
143
144 -- TODO better default date and log the errors to improve data quality
145 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
146 context2date context timeUnit = do
147 let hyperdata = _context_hyperdata context
148 let
149 year = fromMaybe 1 $ _hd_publication_year hyperdata
150 month = fromMaybe 1 $ _hd_publication_month hyperdata
151 day = fromMaybe 1 $ _hd_publication_day hyperdata
152 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
153
154
155 ---------------
156 -- | Dates | --
157 ---------------
158 toMonths :: Integer -> Int -> Int -> Date
159 toMonths y m d = fromIntegral $ cdMonths
160 $ diffGregorianDurationClip (fromGregorian y m d)
161 (fromGregorian 0000 0 0)
162
163 toDays :: Integer -> Int -> Int -> Date
164 toDays y m d = fromIntegral
165 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
166
167 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
168 toPhyloDate y m d tu = case tu of
169 Year {} -> y
170 Month {} -> toMonths (Prelude.toInteger y) m d
171 Week {} -> div (toDays (Prelude.toInteger y) m d) 7
172 Day {} -> toDays (Prelude.toInteger y) m d
173 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
174
175 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
176 toPhyloDate' y _m _d (Epoch {}) = pack $ show $ posixSecondsToUTCTime $ fromIntegral y
177 toPhyloDate' y m d _ = pack $ showGregorian $ fromGregorian (toInteger y) m d
178
179 -- Utils
180
181 writePhylo :: [Char] -> Phylo -> IO ()
182 writePhylo path phylo = Lazy.writeFile path $ encode phylo
183
184
185 readPhylo :: [Char] -> IO Phylo
186 readPhylo path = do
187 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
188 case phyloJson of
189 Left err -> do
190 putStrLn err
191 undefined
192 Right phylo -> pure phylo
193
194
195 -- | To read and decode a Json file
196 readJson :: FilePath -> IO Lazy.ByteString
197 readJson = Lazy.readFile