]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
[PHYLO] Code Review + Tools start.
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28
29 module Gargantext.Viz.Phylo where
30
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Maybe (Maybe)
34 import Data.Text (Text)
35 import Data.Set (Set)
36 import Data.Map (Map)
37 import Data.Vector (Vector)
38 import Data.Time.Clock.POSIX (POSIXTime)
39 import GHC.Generics (Generic)
40 import Gargantext.Database.Schema.Ngrams (NgramsId)
41 import Gargantext.Core.Utils.Prefix (unPrefix)
42 import Gargantext.Prelude
43
44 ------------------------------------------------------------------------
45 data PhyloExport =
46 PhyloExport { _phyloExport_param :: PhyloParam
47 , _phyloExport_data :: Phylo
48 } deriving (Generic)
49
50 -- | .phylo parameters
51 data PhyloParam =
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_params :: Hash
55 } deriving (Generic)
56
57 type Hash = Text
58
59 -- | Software
60 -- TODO move somewhere since it is generic
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic)
65
66 ------------------------------------------------------------------------
67 -- | Phylo datatype descriptor of a phylomemy
68 -- Duration : time Segment of the whole phylomemy (start,end)
69 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
70 -- Steps : list of all steps to build the phylomemy
71 data Phylo =
72 Phylo { _phylo_duration :: (Start, End)
73 , _phylo_ngrams :: PhyloNgrams
74 , _phylo_periods :: [PhyloPeriod]
75 }
76 deriving (Generic, Show)
77
78
79 -- | Date : a simple Integer
80 type Date = Int
81
82 -- | UTCTime in seconds since UNIX epoch
83 -- type Start = POSIXTime
84 -- type End = POSIXTime
85 type Start = Date
86 type End = Date
87
88 -- | PhyloStep : steps of phylomemy on temporal axis
89 -- Period: tuple (start date, end date) of the step of the phylomemy
90 -- Levels: levels of granularity
91 data PhyloPeriod =
92 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
93 , _phylo_periodLevels :: [PhyloLevel]
94 }
95 deriving (Generic, Show)
96
97 type PhyloPeriodId = (Start, End)
98
99 -- | PhyloLevel : levels of phylomemy on level axis
100 -- Levels description:
101 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
102 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
103 -- Level 1: First level of clustering
104 -- Level N: Nth level of clustering
105 data PhyloLevel =
106 PhyloLevel { _phylo_levelId :: PhyloLevelId
107 , _phylo_levelGroups :: [PhyloGroup]
108 }
109 deriving (Generic, Show)
110
111 type PhyloLevelId = (PhyloPeriodId, Int)
112
113 -- | PhyloGroup : group of ngrams at each level and step
114 -- Label : maybe has a label as text
115 -- Ngrams: set of terms that build the group
116 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
117 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
118 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
119 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
120 data PhyloGroup =
121 PhyloGroup { _phylo_groupId :: PhyloGroupId
122 , _phylo_groupLabel :: Text
123 , _phylo_groupNgrams :: [Int]
124 , _phylo_groupQuality :: Map Text Double
125
126 , _phylo_groupPeriodParents :: [Pointer]
127 , _phylo_groupPeriodChilds :: [Pointer]
128
129 , _phylo_groupLevelParents :: [Pointer]
130 , _phylo_groupLevelChilds :: [Pointer]
131 }
132 deriving (Generic, Show)
133
134 type PhyloGroupId = (PhyloLevelId, Int)
135 type Pointer = (PhyloGroupId, Weight)
136 type Weight = Double
137
138
139
140 -- | Ngrams : a contiguous sequence of n terms
141 type Ngrams = Text
142 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
143 type PhyloNgrams = Vector Ngrams
144
145
146 -- | Clique : Set of ngrams cooccurring in the same Document
147 type Clique = Set Ngrams
148 -- | Support : Number of Documents where a Clique occurs
149 type Support = Int
150 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
151 type Fis = Map Clique Support
152
153
154 -- | Lenses
155 makeLenses ''Phylo
156 makeLenses ''PhyloParam
157 makeLenses ''PhyloExport
158 makeLenses ''Software
159 makeLenses ''PhyloGroup
160 makeLenses ''PhyloLevel
161 makeLenses ''PhyloPeriod
162
163 -- | JSON instances
164 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
165 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
166 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
167 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
168 --
169 $(deriveJSON (unPrefix "_software_" ) ''Software )
170 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
171 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
172
173 -- | TODO XML instances
174