]> Git — Sourcephile - gargantext.git/blob - src/Gargantext/Types/Phylo.hs
[TEXT-MINING] adding first functions/datatypes.
[gargantext.git] / src / Gargantext / Types / Phylo.hs
1 {-|
2 Module : Gargantext.Types.Phylo
3 Description : Main Types for Phylomemy
4 Copyright : (c) CNRS, 2017-Present
5 License : AGPL + CECILL v3
6 Maintainer : team@gargantext.org
7 Stability : experimental
8 Portability : POSIX
9
10 Specifications of Phylomemy format.
11
12 Phylomemy can be described as a Temporal Graph with different scale of
13 granularity of group of ngrams (terms and multi-terms).
14
15 The main type is Phylo which is synonym of Phylomemy (only difference is
16 the number of chars).
17
18 Phylomemy was first described in [REF].
19 -}
20
21 {-# LANGUAGE DeriveGeneric #-}
22 {-# LANGUAGE TemplateHaskell #-}
23
24 module Gargantext.Types.Phylo where
25
26 import Data.Aeson.TH (deriveJSON)
27 import Data.Maybe (Maybe)
28 import Data.Text (Text)
29 import Data.Time.Clock.POSIX (POSIXTime)
30
31 import GHC.Generics (Generic)
32
33 import Gargantext.Prelude
34 import Gargantext.Utils.Prefix (unPrefix)
35
36 ------------------------------------------------------------------------
37 -- | Phylo datatype descriptor of a phylomemy
38 -- Duration : time Segment of the whole phylomemy in UTCTime format (start,end)
39 -- Ngrams : list of all (possible) terms contained in the phylomemy (with their id)
40 -- Steps : list of all steps to build the phylomemy
41 data Phylo = Phylo { _phyloDuration :: (Start, End)
42 , _phyloNgrams :: [Ngram]
43 , _phyloPeriods :: [PhyloPeriod]
44 } deriving (Generic)
45
46 -- | UTCTime in seconds since UNIX epoch
47 type Start = POSIXTime
48 type End = POSIXTime
49
50 type Ngram = (NgramId, Text)
51 type NgramId = Int
52
53 -- | PhyloStep : steps of phylomemy on temporal axis
54 -- Period: tuple (start date, end date) of the step of the phylomemy
55 -- Levels: levels of granularity
56 data PhyloPeriod = PhyloPeriod { _phyloPeriodId :: PhyloPeriodId
57 , _phyloPeriodLevels :: [PhyloLevel]
58 } deriving (Generic)
59
60 type PhyloPeriodId = (Start, End)
61
62 -- | PhyloLevel : levels of phylomemy on level axis
63 -- Levels description:
64 -- Level -1: Ngram equals itself (by identity) == _phyloNgrams
65 -- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
66 -- Level 1: First level of clustering
67 -- Level N: Nth level of clustering
68 data PhyloLevel = PhyloLevel { _phyloLevelId :: PhyloLevelId
69 , _phyloLevelGroups :: [PhyloGroup]
70 } deriving (Generic)
71
72 type PhyloLevelId = (PhyloPeriodId, Int)
73
74 -- | PhyloGroup : group of ngrams at each level and step
75 -- Label : maybe has a label as text
76 -- Ngrams: set of terms that build the group
77 -- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
78 -- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
79 data PhyloGroup = PhyloGroup { _phyloGroupId :: PhyloGroupId
80 , _phyloGroupLabel :: Maybe Text
81 , _phyloGroupNgrams :: [NgramId]
82
83 , _phyloGroupPeriodParents :: [Edge]
84 , _phyloGroupPeriodChilds :: [Edge]
85
86 , _phyloGroupLevelParents :: [Edge]
87 , _phyloGroupLevelChilds :: [Edge]
88 } deriving (Generic)
89
90 type PhyloGroupId = (PhyloLevelId, Int)
91 type Edge = (PhyloGroupId, Weight)
92 type Weight = Double
93
94 -- | JSON instances
95 $(deriveJSON (unPrefix "_phylo" ) ''Phylo )
96 $(deriveJSON (unPrefix "_phyloPeriod" ) ''PhyloPeriod )
97 $(deriveJSON (unPrefix "_phyloLevel" ) ''PhyloLevel )
98 $(deriveJSON (unPrefix "_phyloGroup" ) ''PhyloGroup )