]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Viz/Phylo.hs
[Phylo] Reading cosmetics / New Tools file.
[gargantext.git] / src / Gargantext / Viz / Phylo.hs
1 {-|
2 Module : Gargantext.Viz.Phylo
3 Description : Phylomemy definitions and types.
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy export format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 References:
19 Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
20 in science evolution — the rise and fall of scientific fields. PloS
21 one 8, e54847.
22
23 -}
24
25 {-# LANGUAGE DeriveGeneric #-}
26 {-# LANGUAGE NoImplicitPrelude #-}
27 {-# LANGUAGE TemplateHaskell #-}
28
29 module Gargantext.Viz.Phylo where
30
31 import Control.Lens (makeLenses)
32 import Data.Aeson.TH (deriveJSON)
33 import Data.Maybe (Maybe)
34 import Data.Text (Text)
35 import Data.Set (Set)
36 import Data.Map (Map)
37 import Data.Vector (Vector)
38 import Data.Time.Clock.POSIX (POSIXTime)
39 import GHC.Generics (Generic)
40 import Gargantext.Database.Schema.Ngrams (NgramsId)
41 import Gargantext.Core.Utils.Prefix (unPrefix)
42 import Gargantext.Prelude
43
44 ------------------------------------------------------------------------
45 data PhyloExport =
46 PhyloExport { _phyloExport_param :: PhyloParam
47 , _phyloExport_data :: Phylo
48 } deriving (Generic)
49
50 -- | .phylo parameters
51 data PhyloParam =
52 PhyloParam { _phyloParam_version :: Text -- Double ?
53 , _phyloParam_software :: Software
54 , _phyloParam_params :: Hash
55 } deriving (Generic)
56
57 type Hash = Text
58
59 -- | Software
60 -- TODO move somewhere since it is generic
61 data Software =
62 Software { _software_name :: Text
63 , _software_version :: Text
64 } deriving (Generic)
65
66 ------------------------------------------------------------------------
67 -- | Phylo datatype descriptor of a phylomemy
68 -- Duration : time Segment of the whole phylomemy (start,end)
69 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
70 -- Steps : list of all steps to build the phylomemy
71 data Phylo =
72 Phylo { _phylo_duration :: (Start, End)
73 , _phylo_ngrams :: PhyloNgrams
74 , _phylo_periods :: [PhyloPeriod]
75 }
76 deriving (Generic, Show)
77
78 -- | UTCTime in seconds since UNIX epoch
79 -- type Start = POSIXTime
80 -- type End = POSIXTime
81 type Start = Int
82 type End = Int
83
84
85 -- | PhyloStep : steps of phylomemy on temporal axis
86 -- Period: tuple (start date, end date) of the step of the phylomemy
87 -- Levels: levels of granularity
88 data PhyloPeriod =
89 PhyloPeriod { _phylo_periodId :: PhyloPeriodId
90 , _phylo_periodLevels :: [PhyloLevel]
91 }
92 deriving (Generic, Show)
93
94 type PhyloPeriodId = (Start, End)
95
96 -- | PhyloLevel : levels of phylomemy on level axis
97 -- Levels description:
98 -- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
99 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
100 -- Level 1: First level of clustering
101 -- Level N: Nth level of clustering
102 data PhyloLevel =
103 PhyloLevel { _phylo_levelId :: PhyloLevelId
104 , _phylo_levelGroups :: [PhyloGroup]
105 }
106 deriving (Generic, Show)
107
108 type PhyloLevelId = (PhyloPeriodId, Int)
109
110 -- | PhyloGroup : group of ngrams at each level and step
111 -- Label : maybe has a label as text
112 -- Ngrams: set of terms that build the group
113 -- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
114 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
115 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
116 -- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
117 data PhyloGroup =
118 PhyloGroup { _phylo_groupId :: PhyloGroupId
119 , _phylo_groupLabel :: Text
120 , _phylo_groupNgrams :: [Int]
121 , _phylo_groupQuality :: Map Text Double
122
123 , _phylo_groupPeriodParents :: [Pointer]
124 , _phylo_groupPeriodChilds :: [Pointer]
125
126 , _phylo_groupLevelParents :: [Pointer]
127 , _phylo_groupLevelChilds :: [Pointer]
128 }
129 deriving (Generic, Show)
130
131 type PhyloGroupId = (PhyloLevelId, Int)
132 type Pointer = (PhyloGroupId, Weight)
133 type Weight = Double
134
135
136
137 -- | Ngrams : a contiguous sequence of n terms
138 type Ngrams = Text
139 -- | PhyloNgrams : Vector of all the Ngrams (PhyloGroup of level -1) used within a Phylo
140 type PhyloNgrams = Vector Ngrams
141
142
143 -- | Clique : Set of ngrams cooccurring in the same Document
144 type Clique = Set Ngrams
145 -- | Support : Number of Documents where a Clique occurs
146 type Support = Int
147 -- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
148 type Fis = Map Clique Support
149
150
151 -- | Lenses
152 makeLenses ''Phylo
153 makeLenses ''PhyloParam
154 makeLenses ''PhyloExport
155 makeLenses ''Software
156 makeLenses ''PhyloGroup
157 makeLenses ''PhyloLevel
158 makeLenses ''PhyloPeriod
159
160 -- | JSON instances
161 $(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
162 $(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
163 $(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
164 $(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
165 --
166 $(deriveJSON (unPrefix "_software_" ) ''Software )
167 $(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
168 $(deriveJSON (unPrefix "_phyloExport_" ) ''PhyloExport )
169
170 -- | TODO XML instances
171