]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Core/Viz/Phylo/API/Tools.hs
[facet] some fixes to tfidfAll, small refactoring
[gargantext.git] / src / Gargantext / Core / Viz / Phylo / API / Tools.hs
1 {-|
2 Module : Gargantext.Core.Viz.Phylo.API
3 Description :
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 -}
11
12 module Gargantext.Core.Viz.Phylo.API.Tools
13 where
14
15 import Data.Proxy
16 import Data.Aeson (Value, decodeFileStrict, eitherDecode, encode)
17 import Data.Map.Strict (Map)
18 import Data.Maybe (catMaybes)
19 import Data.Set (Set)
20 import Data.Text (Text, pack)
21 import Data.Time.Calendar (fromGregorian, diffGregorianDurationClip, cdMonths, diffDays, showGregorian)
22 import Data.Time.Clock.POSIX(posixSecondsToUTCTime)
23 import Gargantext.API.Ngrams.Prelude (getTermList)
24 import Gargantext.API.Ngrams.Tools (getRepo)
25 import Gargantext.API.Ngrams.Types (NgramsTerm(..))
26 import Gargantext.API.Node.Corpus.Export (getContextNgrams)
27 import Gargantext.API.Prelude (GargNoServer)
28 import Gargantext.Core.Text.Context (TermList)
29 import Gargantext.Core.Types (Context)
30 import Gargantext.Core.Types.Main (ListType(MapTerm))
31 import Gargantext.Core.Viz.Phylo (TimeUnit(..), Date, Document(..), PhyloConfig(..), Phylo)
32 import Gargantext.Core.Viz.Phylo.PhyloExport (toPhyloExport, dotToFile)
33 import Gargantext.Core.Viz.Phylo.PhyloMaker (toPhylo, toPhyloWithoutLink)
34 import Gargantext.Core.Viz.Phylo.PhyloTools ({-printIOMsg, printIOComment,-} setConfig)
35 import Gargantext.Database.Admin.Types.Hyperdata.Document (HyperdataDocument(..))
36 import Gargantext.Database.Admin.Types.Hyperdata (HyperdataPhylo(..))
37 import Gargantext.Database.Admin.Types.Node (CorpusId, ContextId, PhyloId)
38 import Gargantext.Database.Query.Table.Node (defaultList, getNodeWith)
39 import Gargantext.Database.Query.Table.NodeContext (selectDocNodes)
40 import Gargantext.Database.Schema.Context
41 import Gargantext.Database.Schema.Node
42 import Gargantext.Database.Schema.Ngrams (NgramsType(..))
43 import Gargantext.Prelude
44 import Prelude
45 import System.Process as Shell
46 import qualified Data.ByteString.Lazy as Lazy
47 import qualified Data.List as List
48 import qualified Data.Map.Strict as Map
49 import qualified Data.Set as Set
50
51
52 --------------------------------------------------------------------
53 getPhyloData :: PhyloId -> GargNoServer (Maybe Phylo)
54 getPhyloData phyloId = do
55 nodePhylo <- getNodeWith phyloId (Proxy :: Proxy HyperdataPhylo)
56 pure $ _hp_data $ _node_hyperdata nodePhylo
57
58 putPhylo :: PhyloId -> GargNoServer Phylo
59 putPhylo = undefined
60
61 savePhylo :: PhyloId -> GargNoServer ()
62 savePhylo = undefined
63
64 --------------------------------------------------------------------
65 phylo2dot2json :: Phylo -> IO Value
66 phylo2dot2json phylo = do
67
68 let
69 file_from = "/tmp/fromPhylo.json"
70 file_dot = "/tmp/tmp.dot"
71 file_to_json = "/tmp/toPhylo.json"
72
73 _ <- dotToFile file_from (toPhyloExport phylo)
74 _ <- Shell.callProcess "dot" ["-Tdot", "-o", file_dot, file_from]
75 _ <- Shell.callProcess "dot" ["-Txdot_json", "-o", file_to_json, file_dot]
76
77 maybeValue <- decodeFileStrict file_to_json
78 print maybeValue
79 _ <- Shell.callProcess "/bin/rm" ["-rf", file_from, file_to_json, file_dot]
80
81 case maybeValue of
82 Nothing -> panic "[G.C.V.Phylo.API.phylo2dot2json] Error no file"
83 Just v -> pure v
84
85
86
87 flowPhyloAPI :: PhyloConfig -> CorpusId -> GargNoServer Phylo
88 flowPhyloAPI config cId = do
89 (_, corpus) <- corpusIdtoDocuments (timeUnit config) cId
90 phyloWithCliques <- pure $ toPhyloWithoutLink corpus config
91 -- writePhylo phyloWithCliquesFile phyloWithCliques
92 pure $ toPhylo (setConfig config phyloWithCliques)
93
94 --------------------------------------------------------------------
95 corpusIdtoDocuments :: TimeUnit -> CorpusId -> GargNoServer (TermList, [Document])
96 corpusIdtoDocuments timeUnit corpusId = do
97 docs <- selectDocNodes corpusId
98 lId <- defaultList corpusId
99 repo <- getRepo [lId]
100
101 ngs_terms <- getContextNgrams corpusId lId MapTerm NgramsTerms repo
102 ngs_sources <- getContextNgrams corpusId lId MapTerm Sources repo
103
104 termList <- getTermList lId MapTerm NgramsTerms
105
106 let docs'= catMaybes
107 $ List.map (\doc
108 -> context2phyloDocument timeUnit doc (ngs_terms, ngs_sources)
109 ) docs
110
111 -- printDebug "corpusIdtoDocuments" (Prelude.map date docs')
112
113 case termList of
114 Nothing -> panic "[G.C.V.Phylo.API] no termList found"
115 Just termList' -> pure (termList', docs')
116
117 context2phyloDocument :: TimeUnit
118 -> Context HyperdataDocument
119 -> (Map ContextId (Set NgramsTerm), Map ContextId (Set NgramsTerm))
120 -> Maybe Document
121 context2phyloDocument timeUnit context (ngs_terms, ngs_sources) = do
122 let contextId = _context_id context
123 (date, date') <- context2date context timeUnit
124
125 let
126 toText x = Set.toList $ Set.map unNgramsTerm x
127
128 text' = maybe [] toText $ Map.lookup contextId ngs_terms
129 sources' = maybe [] toText $ Map.lookup contextId ngs_sources
130
131 pure $ Document date date' text' Nothing sources'
132
133
134 context2date :: Context HyperdataDocument -> TimeUnit -> Maybe (Date, Text)
135 context2date context timeUnit = do
136 let hyperdata = _context_hyperdata context
137 year <- _hd_publication_year hyperdata
138 month <- _hd_publication_month hyperdata
139 day <- _hd_publication_day hyperdata
140 pure (toPhyloDate year month day timeUnit, toPhyloDate' year month day timeUnit)
141
142
143 ---------------
144 -- | Dates | --
145 ---------------
146 toMonths :: Integer -> Int -> Int -> Date
147 toMonths y m d = fromIntegral $ cdMonths
148 $ diffGregorianDurationClip (fromGregorian y m d)
149 (fromGregorian 0000 0 0)
150
151 toDays :: Integer -> Int -> Int -> Date
152 toDays y m d = fromIntegral
153 $ diffDays (fromGregorian y m d) (fromGregorian 0000 0 0)
154
155 toPhyloDate :: Int -> Int -> Int -> TimeUnit -> Date
156 toPhyloDate y m d tu = case tu of
157 Year _ _ _ -> y
158 Month _ _ _ -> toMonths (Prelude.toInteger y) m d
159 Week _ _ _ -> div (toDays (Prelude.toInteger y) m d) 7
160 Day _ _ _ -> toDays (Prelude.toInteger y) m d
161 _ -> panic "[G.C.V.Phylo.API] toPhyloDate"
162
163 toPhyloDate' :: Int -> Int -> Int -> TimeUnit -> Text
164 toPhyloDate' y m d tu = case tu of
165 Epoch _ _ _ -> pack $ show $ posixSecondsToUTCTime $ fromIntegral y
166 Year _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
167 Month _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
168 Week _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
169 Day _ _ _ -> pack $ showGregorian $ fromGregorian (toInteger y) m d
170
171 -- Utils
172
173 writePhylo :: [Char] -> Phylo -> IO ()
174 writePhylo path phylo = Lazy.writeFile path $ encode phylo
175
176
177 readPhylo :: [Char] -> IO Phylo
178 readPhylo path = do
179 phyloJson <- (eitherDecode <$> readJson path) :: IO (Either String Phylo)
180 case phyloJson of
181 Left err -> do
182 putStrLn err
183 undefined
184 Right phylo -> pure phylo
185
186
187 -- | To read and decode a Json file
188 readJson :: FilePath -> IO Lazy.ByteString
189 readJson path = Lazy.readFile path