2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
10 Specifications of Phylomemy export format.
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
15 The main type is Phylo which is synonym of Phylomemy (only difference is
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28 {-# LANGUAGE MultiParamTypeClasses #-}
30 module Gargantext.Viz.Phylo where
32 import Control.Lens (makeLenses)
33 import Data.Aeson.TH (deriveJSON)
34 import Data.Maybe (Maybe)
35 import Data.Text (Text)
38 import Data.Vector (Vector)
39 --import Data.Time.Clock.POSIX (POSIXTime)
40 import GHC.Generics (Generic)
41 --import Gargantext.Database.Schema.Ngrams (NgramsId)
42 import Gargantext.Core.Utils.Prefix (unPrefix)
43 import Gargantext.Prelude
45 ------------------------------------------------------------------------
47 PhyloExport { _phyloExport_param :: PhyloParam
48 , _phyloExport_data :: Phylo
51 -- | .phylo parameters
53 PhyloParam { _phyloParam_version :: Text -- Double ?
54 , _phyloParam_software :: Software
55 , _phyloParam_params :: Hash
61 -- TODO move somewhere since it is generic
63 Software { _software_name :: Text
64 , _software_version :: Text
67 ------------------------------------------------------------------------
69 -- | Phylo datatype of a phylomemy
70 -- Duration : time Segment of the whole Phylo
71 -- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
72 -- Periods : list of all the periods of a Phylo
74 Phylo { _phylo_duration :: (Start, End)
75 , _phylo_foundations :: Vector Ngrams
76 , _phylo_periods :: [PhyloPeriod]
78 deriving (Generic, Show, Eq)
81 -- | Date : a simple Integer
84 -- | UTCTime in seconds since UNIX epoch
85 -- type Start = POSIXTime
86 -- type End = POSIXTime
90 -- | PhyloStep : steps of phylomemy on temporal axis
91 -- Period: tuple (start date, end date) of the step of the phylomemy
92 -- Levels: levels of granularity
94 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
95 , _phylo_periodLevels :: [PhyloLevel]
97 deriving (Generic, Show, Eq)
100 -- | PhyloLevel : levels of phylomemy on level axis
101 -- Levels description:
102 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
103 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
104 -- Level 1: First level of clustering
105 -- Level N: Nth level of clustering
107 PhyloLevel { _phylo_levelId :: PhyloLevelId
108 , _phylo_levelGroups :: [PhyloGroup]
110 deriving (Generic, Show, Eq)
113 -- | PhyloGroup : group of ngrams at each level and step
114 -- Label : maybe has a label as text
115 -- Ngrams: set of terms that build the group
116 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
117 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
118 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
119 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
121 PhyloGroup { _phylo_groupId :: PhyloGroupId
122 , _phylo_groupLabel :: Text
123 , _phylo_groupNgrams :: [Int]
124 , _phylo_groupMeta :: Map Text Double
125 , _phylo_groupCooc :: Map (Int, Int) Double
126 , _phylo_groupBranchId :: Maybe PhyloBranchId
128 , _phylo_groupPeriodParents :: [Pointer]
129 , _phylo_groupPeriodChilds :: [Pointer]
131 , _phylo_groupLevelParents :: [Pointer]
132 , _phylo_groupLevelChilds :: [Pointer]
134 deriving (Generic, Show, Eq, Ord)
137 -- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
139 -- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
143 type PhyloPeriodId = (Start, End)
144 type PhyloLevelId = (PhyloPeriodId, Level)
145 type PhyloGroupId = (PhyloLevelId, Index)
146 type PhyloBranchId = (Level, Index)
149 -- | Weight : A generic mesure that can be associated with an Id
151 -- | Pointer : A weighted linked with a given PhyloGroup
152 type Pointer = (PhyloGroupId, Weight)
153 -- | Ngrams : a contiguous sequence of n terms
157 -- | Clique : Set of ngrams cooccurring in the same Document
158 type Clique = Set Ngrams
159 -- | Support : Number of Documents where a Clique occurs
161 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
162 type Fis = (Clique,Support)
165 -- | Document : a piece of Text linked to a Date
166 data Document = Document
172 type Cluster = [PhyloGroup]
175 -- | A List of PhyloGroup in a PhyloGraph
176 type PhyloNodes = [PhyloGroup]
177 -- | A List of weighted links between some PhyloGroups in a PhyloGraph
178 type PhyloEdges = [((PhyloGroup,PhyloGroup),Weight)]
179 -- | The association as a Graph between a list of Nodes and a list of Edges
180 type PhyloGraph = (PhyloNodes,PhyloEdges)
183 data PhyloError = LevelDoesNotExist
188 -- | A List of Proximity mesures or strategies
189 data Proximity = WeightedLogJaccard | Hamming | FromPairs
190 -- | A List of Clustering methods
191 data Clustering = Louvain | RelatedComponents
193 data PairTo = Childs | Parents
197 data EdgeType = Directed | UnDirected
199 data ViewGraph = ViewGraph
200 { _view_graphParam :: PhyloParam
201 , _view_graphLabel :: Text
202 , _view_graphEdgeType :: EdgeType
203 , _view_graphBranches :: [(PhyloBranchId,Text)]
204 , _view_graphNodes :: [ViewNode]
205 , _view_graphEdges :: [ViewEdge]
208 data ViewEdge = ViewEdge
209 { _view_edgeSource :: PhyloGroupId
210 , _view_edgeTarget :: PhyloGroupId
211 , _view_edgeWeight :: Weight
214 data ViewNode = ViewNode
215 { _view_nodeId :: PhyloGroupId
216 , _view_nodeLabel :: Text
217 , _view_nodeNgrams :: [Ngrams]
218 , _view_nodeMeta :: Map Text Double
219 , _view_nodeParent :: PhyloGroupId
224 makeLenses ''PhyloParam
225 makeLenses ''PhyloExport
226 makeLenses ''Software
227 makeLenses ''PhyloGroup
228 makeLenses ''PhyloLevel
229 makeLenses ''PhyloPeriod
232 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
233 $(deriveJSON (unPrefix "_phylo_period" ) 'PhyloPeriod )
234 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
235 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
237 $(deriveJSON (unPrefix "_software_" ) ''Software )
238 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
239 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
241 -- | TODO XML instances