]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Types/Phylo.hs
[Structure] Ngrams -> Text.
[gargantext.git] / src / Gargantext / Types / Phylo.hs
1 {-|
2 Module : Gargantext.Types.Phylo
3 Description : Main Types for Phylomemy
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 Phylomemy was first described in [REF].
19 -}
20
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE NoImplicitPrelude #-}
23 {-# LANGUAGE TemplateHaskell #-}
24
25 module Gargantext.Types.Phylo where
26
27 import Data.Aeson.TH (deriveJSON)
28 import Data.Maybe (Maybe)
29 import Data.Text (Text)
30 import Data.Time.Clock.POSIX (POSIXTime)
31
32 import GHC.Generics (Generic)
33
34 import Gargantext.Prelude
35 import Gargantext.Utils.Prefix (unPrefix)
36
37 ------------------------------------------------------------------------
38 -- | Phylo datatype descriptor of a phylomemy
39 -- Duration : time Segment of the whole phylomemy in UTCTime format (start,end)
40 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
41 -- Steps : list of all steps to build the phylomemy
42 data Phylo = Phylo { _phyloDuration :: (Start, End)
43 , _phyloNgrams :: [Ngram]
44 , _phyloPeriods :: [PhyloPeriod]
45 } deriving (Generic)
46
47 -- | UTCTime in seconds since UNIX epoch
48 type Start = POSIXTime
49 type End = POSIXTime
50
51 type Ngram = (NgramId, Text)
52 type NgramId = Int
53
54 -- | PhyloStep : steps of phylomemy on temporal axis
55 -- Period: tuple (start date, end date) of the step of the phylomemy
56 -- Levels: levels of granularity
57 data PhyloPeriod = PhyloPeriod { _phyloPeriodId :: PhyloPeriodId
58 , _phyloPeriodLevels :: [PhyloLevel]
59 } deriving (Generic)
60
61 type PhyloPeriodId = (Start, End)
62
63 -- | PhyloLevel : levels of phylomemy on level axis
64 -- Levels description:
65 -- Level -1: Ngram equals itself (by identity) == _phyloNgrams
66 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
67 -- Level 1: First level of clustering
68 -- Level N: Nth level of clustering
69 data PhyloLevel = PhyloLevel { _phyloLevelId :: PhyloLevelId
70 , _phyloLevelGroups :: [PhyloGroup]
71 } deriving (Generic)
72
73 type PhyloLevelId = (PhyloPeriodId, Int)
74
75 -- | PhyloGroup : group of ngrams at each level and step
76 -- Label : maybe has a label as text
77 -- Ngrams: set of terms that build the group
78 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
79 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
80 data PhyloGroup = PhyloGroup { _phyloGroupId :: PhyloGroupId
81 , _phyloGroupLabel :: Maybe Text
82 , _phyloGroupNgrams :: [NgramId]
83
84 , _phyloGroupPeriodParents :: [Edge]
85 , _phyloGroupPeriodChilds :: [Edge]
86
87 , _phyloGroupLevelParents :: [Edge]
88 , _phyloGroupLevelChilds :: [Edge]
89 } deriving (Generic)
90
91 type PhyloGroupId = (PhyloLevelId, Int)
92 type Edge = (PhyloGroupId, Weight)
93 type Weight = Double
94
95 -- | JSON instances
96 $(deriveJSON (unPrefix "_phylo" ) ''Phylo )
97 $(deriveJSON (unPrefix "_phyloPeriod" ) ''PhyloPeriod )
98 $(deriveJSON (unPrefix "_phyloLevel" ) ''PhyloLevel )
99 $(deriveJSON (unPrefix "_phyloGroup" ) ''PhyloGroup )