2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
45 ------------------------------------------------------------------------
46 data PhyloQuery = PhyloQuery
47 { _phyloQuery_phyloName :: Text
48 , _phyloQuery_phyloDescription :: Text
50 , _phyloQuery_timeGrain :: Int
51 , _phyloQuery_timeSteps :: Int
53 , _phyloQuery_fstCluster :: Clustering
54 , _phyloQuery_timeMatching :: Proximity
56 , _phyloQuery_nthLevel :: Level
57 , _phyloQuery_nthCluster :: Clustering
62 PhyloExport { _phyloExport_param :: PhyloParam
63 , _phyloExport_data :: Phylo
64 } deriving (Generic, Show)
66 -- | .phylo parameters
68 PhyloParam { _phyloParam_version :: Text -- Double ?
69 , _phyloParam_software :: Software
70 , _phyloParam_params :: Hash
71 , _phyloParam_query :: Maybe PhyloQuery
72 } deriving (Generic, Show)
77 -- TODO move somewhere since it is generic
79 Software { _software_name :: Text
80 , _software_version :: Text
81 } deriving (Generic, Show)
83 ------------------------------------------------------------------------
85 -- | Phylo datatype of a phylomemy
86 -- Duration : time Segment of the whole Phylo
87 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
88 -- Periods : list of all the periods of a Phylo
90 Phylo { _phylo_duration :: (Start, End)
91 , _phylo_foundations :: Vector Ngrams
92 , _phylo_periods :: [PhyloPeriod]
94 deriving (Generic, Show)
97 -- | Date : a simple Integer
100 -- | UTCTime in seconds since UNIX epoch
101 -- type Start = POSIXTime
102 -- type End = POSIXTime
106 -- | PhyloStep : steps of phylomemy on temporal axis
107 -- Period: tuple (start date, end date) of the step of the phylomemy
108 -- Levels: levels of granularity
110 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
111 , _phylo_periodLevels :: [PhyloLevel]
113 deriving (Generic, Show)
116 -- | PhyloLevel : levels of phylomemy on level axis
117 -- Levels description:
118 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
119 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
120 -- Level 1: First level of clustering
121 -- Level N: Nth level of clustering
123 PhyloLevel { _phylo_levelId :: PhyloLevelId
124 , _phylo_levelGroups :: [PhyloGroup]
126 deriving (Generic, Show)
129 -- | PhyloGroup : group of ngrams at each level and step
130 -- Label : maybe has a label as text
131 -- Ngrams: set of terms that build the group
132 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
133 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
134 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
135 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
137 PhyloGroup { _phylo_groupId :: PhyloGroupId
138 , _phylo_groupLabel :: Text
139 , _phylo_groupNgrams :: [Int]
140 , _phylo_groupMeta :: Map Text Double
141 , _phylo_groupCooc :: Map (Int, Int) Double
142 , _phylo_groupBranchId :: Maybe PhyloBranchId
144 , _phylo_groupPeriodParents :: [Pointer]
145 , _phylo_groupPeriodChilds :: [Pointer]
147 , _phylo_groupLevelParents :: [Pointer]
148 , _phylo_groupLevelChilds :: [Pointer]
150 deriving (Generic, Show, Eq, Ord)
153 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
155 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
159 type PhyloPeriodId = (Start, End)
160 type PhyloLevelId = (PhyloPeriodId, Level)
161 type PhyloGroupId = (PhyloLevelId, Index)
162 type PhyloBranchId = (Level, Index)
165 -- | Weight : A generic mesure that can be associated with an Id
167 -- | Pointer : A weighted linked with a given PhyloGroup
168 type Pointer = (PhyloGroupId, Weight)
169 -- | Ngrams : a contiguous sequence of n terms
173 -- | Clique : Set of ngrams cooccurring in the same Document
174 type Clique = Set Ngrams
175 -- | Support : Number of Documents where a Clique occurs
177 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
178 type Fis = (Clique,Support)
181 -- | Document : a piece of Text linked to a Date
182 data Document = Document
188 type Cluster = [PhyloGroup]
191 -- | A List of PhyloGroup in a Graph
192 type GroupNodes = [PhyloGroup]
193 -- | A List of weighted links between some PhyloGroups in a Graph
194 type GroupEdges = [((PhyloGroup,PhyloGroup),Weight)]
195 -- | The association as a Graph between a list of Nodes and a list of Edges
196 type GroupGraph = (GroupNodes,GroupEdges)
199 data PhyloError = LevelDoesNotExist
204 -- | A List of Proximity methods names
205 data ProximityName = WeightedLogJaccard | Hamming | Filiation deriving (Show)
206 -- | A List of Clustering methods names
207 data ClusteringName = Louvain | RelatedComponents | FrequentItemSet deriving (Show)
208 -- | A constructor for Proximities
209 data Proximity = Proximity
210 { _proximity_name :: ProximityName
211 , _proximity_params :: Map Text Double
212 , _proximity_threshold :: Maybe Double } deriving (Show)
213 -- | A constructor for Clustering
214 data Clustering = Clustering
215 { _clustering_name :: ClusteringName
216 , _clustering_params :: Map Text Double
217 , _clustering_paramsBool :: Map Text Bool
218 , _clustering_proximity :: Maybe Proximity } deriving (Show)
220 ------------------------------------------------------------------------
221 -- | To export a Phylo | --
227 data Filiation = Ascendant | Descendant | Complete deriving (Show)
228 data EdgeType = PeriodEdge | LevelEdge deriving (Show)
230 data PhyloView = PhyloView
231 { _phylo_viewParam :: PhyloParam
232 , _phylo_viewLabel :: Text
233 , _phylo_viewDescription :: Text
234 , _phylo_viewFiliation :: Filiation
235 , _phylo_viewMeta :: Map Text Double
236 , _phylo_viewBranches :: [PhyloBranch]
237 , _phylo_viewNodes :: [PhyloNode]
238 , _phylo_viewEdges :: [PhyloEdge]
242 data PhyloBranch = PhyloBranch
243 { _phylo_branchId :: PhyloBranchId
244 , _phylo_branchLabel :: Text
245 , _phylo_branchMeta :: Map Text Double
249 data PhyloEdge = PhyloEdge
250 { _phylo_edgeSource :: PhyloGroupId
251 , _phylo_edgeTarget :: PhyloGroupId
252 , _phylo_edgeType :: EdgeType
253 , _phylo_edgeWeight :: Weight
257 data PhyloNode = PhyloNode
258 { _phylo_nodeId :: PhyloGroupId
259 , _phylo_nodeBranchId :: Maybe PhyloBranchId
260 , _phylo_nodeLabel :: Text
261 , _phylo_nodeNgramsIdx :: [Int]
262 , _phylo_nodeNgrams :: Maybe [Ngrams]
263 , _phylo_nodeMeta :: Map Text Double
264 , _phylo_nodeParent :: Maybe PhyloGroupId
265 , _phylo_nodeChilds :: [PhyloNode]
271 data Filter = LonelyBranch
272 data Metric = BranchAge
273 data Tagger = BranchLabelFreq | GroupLabelCooc | GroupDynamics
276 data Sort = ByBranchAge
277 data Order = Asc | Desc
279 data DisplayMode = Flat | Nested
282 -- | A query filter seen as : prefix && ((filter params)(clause))
283 data QueryFilter = QueryFilter
284 { _query_filter :: Filter
285 , _query_params :: [Double]
289 -- | A PhyloQueryView is the structured representation of a user query to be applied to a Phylo
290 data PhyloQueryView = PhyloQueryView
291 { _query_lvl :: Level
293 -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ?
294 , _query_filiation :: Filiation
296 -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
297 , _query_childs :: Bool
298 , _query_childsDepth :: Level
300 -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
301 -- Firstly the metrics, then the filters and the taggers
302 , _query_metrics :: [Metric]
303 , _query_filters :: [QueryFilter]
304 , _query_taggers :: [Tagger]
306 -- An asc or desc sort to apply to the PhyloGraph
307 , _query_sort :: Maybe (Sort,Order)
309 -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
310 , _query_display :: DisplayMode
311 , _query_verbose :: Bool
315 ------------------------------------------------------------------------
316 -- | Lenses and Json | --
321 makeLenses ''PhyloParam
322 makeLenses ''PhyloExport
323 makeLenses ''Software
324 makeLenses ''PhyloGroup
325 makeLenses ''PhyloLevel
326 makeLenses ''PhyloPeriod
327 makeLenses ''PhyloView
328 makeLenses ''PhyloQueryView
329 makeLenses ''PhyloBranch
330 makeLenses ''PhyloNode
331 makeLenses ''PhyloEdge
332 makeLenses ''Proximity
333 makeLenses ''Clustering
334 makeLenses ''QueryFilter
335 makeLenses ''PhyloQuery
338 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
339 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
340 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
341 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
343 $(deriveJSON (unPrefix "_software_" ) ''Software )
344 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
345 $(deriveJSON (unPrefix "_clustering_" ) ''Clustering )
346 $(deriveJSON (unPrefix "_proximity_" ) ''Proximity )
347 $(deriveJSON (unPrefix "") ''ProximityName )
348 $(deriveJSON (unPrefix "") ''ClusteringName )
349 $(deriveJSON (unPrefix "_phyloQuery_" ) ''PhyloQuery )
350 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
352 -- | TODO XML instances