-{-|
-Module : Gargantext.Core.Viz.Phylo
+{-
+Module : Gargantext.Core.Viz.AdaptativePhylo
Description : Phylomemy definitions and types.
Copyright : (c) CNRS, 2017-Present
License : AGPL + CECILL v3
Chavalarias, D., Cointet, J.-P., 2013. Phylomemetic patterns
in science evolution — the rise and fall of scientific fields. PloS
one 8, e54847.
-
-}
{-# LANGUAGE DeriveAnyClass #-}
module Gargantext.Core.Viz.Phylo where
-import Control.DeepSeq
+import Control.DeepSeq (NFData)
import Control.Lens (makeLenses)
-import Data.Aeson.TH (deriveJSON,defaultOptions)
-import Data.Map (Map)
-import Data.Set (Set)
+import Data.Aeson
+import Data.Aeson.TH (deriveJSON)
+import Data.Map (Map)
import Data.Swagger
-import Data.Text (Text)
-import Data.Vector (Vector)
-import GHC.Generics (Generic)
-import Gargantext.Core.Utils.Prefix (unPrefix, unPrefixSwagger)
-import Gargantext.Prelude
+import Data.Text (Text, pack)
+import Data.Vector (Vector)
+import GHC.Generics
+import GHC.IO (FilePath)
import Gargantext.Core.Text.Context (TermList)
+import Gargantext.Core.Utils.Prefix (unPrefix)
+import Gargantext.Core.Utils.Prefix (unPrefixSwagger)
+import Gargantext.Prelude
+import qualified Data.Text.Lazy as TextLazy
---------------------
--- | PhyloParam | --
---------------------
+----------------
+-- | PhyloConfig | --
+----------------
+data CorpusParser =
+ Wos {_wos_limit :: Int}
+ | Csv {_csv_limit :: Int}
+ | Csv' {_csv'_limit :: Int}
+ deriving (Show,Generic,Eq)
--- | Global parameters of a Phylo
-data PhyloParam =
- PhyloParam { _phyloParam_version :: !Text -- Double ?
- , _phyloParam_software :: !Software
- , _phyloParam_query :: !PhyloQueryBuild
- } deriving (Generic, Show, Eq)
+instance ToSchema CorpusParser where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
--- | Software parameters
-data Software =
- Software { _software_name :: !Text
- , _software_version :: !Text
- } deriving (Generic, Show, Eq)
+data ListParser = V3 | V4 deriving (Show,Generic,Eq)
+instance ToSchema ListParser
----------------
--- | Phylo | --
----------------
+data SeaElevation =
+ Constante
+ { _cons_start :: Double
+ , _cons_step :: Double }
+ | Adaptative
+ { _adap_granularity :: Double }
+ deriving (Show,Generic,Eq)
+instance ToSchema SeaElevation
--- | Phylo datatype of a phylomemy
--- Duration : time Segment of the whole Phylo
--- Foundations : vector of all the Ngrams contained in a Phylo (build from a list of actants)
--- Periods : list of all the periods of a Phylo
-data Phylo =
- Phylo { _phylo_duration :: !(Start, End)
- , _phylo_foundations :: !PhyloFoundations
- , _phylo_periods :: [PhyloPeriod]
- , _phylo_docsByYears :: !(Map Date Double)
- , _phylo_cooc :: !(Map Date (Map (Int,Int) Double))
- , _phylo_fis :: !(Map (Date,Date) [PhyloFis])
- , _phylo_param :: !PhyloParam
- }
- deriving (Generic, Show, Eq)
+data Proximity =
+ WeightedLogJaccard
+ { _wlj_sensibility :: Double
+{-
+ -- , _wlj_thresholdInit :: Double
+ -- , _wlj_thresholdStep :: Double
+ -- | max height for sea level in temporal matching
+ -- , _wlj_elevation :: Double
+-}
+ }
+ | WeightedLogSim
+ { _wlj_sensibility :: Double
+{-
+ -- , _wlj_thresholdInit :: Double
+ -- , _wlj_thresholdStep :: Double
+ -- | max height for sea level in temporal matching
+ -- , _wlj_elevation :: Double
+-}
+ }
+ | Hamming { _wlj_sensibility :: Double }
+ deriving (Show,Generic,Eq)
--- | The foundations of a phylomemy created from a given TermList
-data PhyloFoundations =
- PhyloFoundations { _phylo_foundationsRoots :: !(Vector Ngrams)
- , _phylo_foundationsTermsList :: !TermList
- } deriving (Generic, Show, Eq)
+instance ToSchema Proximity where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
+
+
+data SynchronyScope = SingleBranch | SiblingBranches | AllBranches
+ deriving (Show,Generic,Eq, ToSchema)
+
+data SynchronyStrategy = MergeRegularGroups | MergeAllGroups
+ deriving (Show,Generic,Eq)
+
+instance ToSchema SynchronyStrategy where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
+
+
+data Synchrony =
+ ByProximityThreshold
+ { _bpt_threshold :: Double
+ , _bpt_sensibility :: Double
+ , _bpt_scope :: SynchronyScope
+ , _bpt_strategy :: SynchronyStrategy }
+ | ByProximityDistribution
+ { _bpd_sensibility :: Double
+ , _bpd_strategy :: SynchronyStrategy }
+ deriving (Show,Generic,Eq)
+
+instance ToSchema Synchrony where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
+
+
+
+data TimeUnit =
+ Epoch
+ { _epoch_period :: Int
+ , _epoch_step :: Int
+ , _epoch_matchingFrame :: Int }
+ | Year
+ { _year_period :: Int
+ , _year_step :: Int
+ , _year_matchingFrame :: Int }
+ | Month
+ { _month_period :: Int
+ , _month_step :: Int
+ , _month_matchingFrame :: Int }
+ | Week
+ { _week_period :: Int
+ , _week_step :: Int
+ , _week_matchingFrame :: Int }
+ | Day
+ { _day_period :: Int
+ , _day_step :: Int
+ , _day_matchingFrame :: Int }
+ deriving (Show,Generic,Eq)
+
+instance ToSchema TimeUnit where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
+
+
+data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
+
+instance ToSchema CliqueFilter where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
+
+
+
+data Clique =
+ Fis
+ { _fis_support :: Int
+ , _fis_size :: Int }
+ | MaxClique
+ { _mcl_size :: Int
+ , _mcl_threshold :: Double
+ , _mcl_filter :: CliqueFilter }
+ deriving (Show,Generic,Eq)
+
+instance ToSchema Clique where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
+
+
+data Quality =
+ Quality { _qua_granularity :: Double
+ , _qua_minBranch :: Int }
+ deriving (Show,Generic,Eq)
+
+instance ToSchema Quality where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_qua_")
+
+
+data PhyloConfig =
+ PhyloConfig { corpusPath :: FilePath
+ , listPath :: FilePath
+ , outputPath :: FilePath
+ , corpusParser :: CorpusParser
+ , listParser :: ListParser
+ , phyloName :: Text
+ , phyloLevel :: Int
+ , phyloProximity :: Proximity
+ , seaElevation :: SeaElevation
+ , findAncestors :: Bool
+ , phyloSynchrony :: Synchrony
+ , phyloQuality :: Quality
+ , timeUnit :: TimeUnit
+ , clique :: Clique
+ , exportLabel :: [PhyloLabel]
+ , exportSort :: Sort
+ , exportFilter :: [Filter]
+ } deriving (Show,Generic,Eq)
+
+
+------------------------------------------------------------------------
+data PhyloSubConfig =
+ PhyloSubConfig { _sc_phyloProximity :: Double
+ , _sc_phyloSynchrony :: Double
+ , _sc_phyloQuality :: Double
+ , _sc_timeUnit :: TimeUnit
+ , _sc_clique :: Clique
+ , _sc_exportFilter :: Double
+ }
+ deriving (Show,Generic,Eq)
--- | Date : a simple Integer
-type Date = Int
+subConfig2config :: PhyloSubConfig -> PhyloConfig
+subConfig2config subConfig = defaultConfig { phyloProximity = WeightedLogJaccard $ _sc_phyloProximity subConfig
+ , phyloSynchrony = ByProximityThreshold (_sc_phyloSynchrony subConfig) 0 AllBranches MergeAllGroups
+ , phyloQuality = Quality (_sc_phyloQuality subConfig) 1
+ , timeUnit = _sc_timeUnit subConfig
+ , clique = _sc_clique subConfig
+ , exportFilter = [ByBranchSize $ _sc_exportFilter subConfig]
+ }
--- | UTCTime in seconds since UNIX epoch
--- type Start = POSIXTime
--- type End = POSIXTime
-type Start = Date
-type End = Date
+------------------------------------------------------------------------
+defaultConfig :: PhyloConfig
+defaultConfig =
+ PhyloConfig { corpusPath = "corpus.csv" -- useful for commandline only
+ , listPath = "list.csv" -- useful for commandline only
+ , outputPath = "data/"
+ , corpusParser = Csv 100000
+ , listParser = V4
+ , phyloName = pack "Phylo Name"
+ , phyloLevel = 2
+ , phyloProximity = WeightedLogJaccard 0.5
+ , seaElevation = Constante 0.1 0.1
+ , findAncestors = False
+ , phyloSynchrony = ByProximityThreshold 0.5 0 AllBranches MergeAllGroups
+ , phyloQuality = Quality 0.5 1
+ , timeUnit = Year 3 1 5
+ , clique = MaxClique 5 0.0001 ByThreshold
+ , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
+ , exportSort = ByHierarchy Desc
+ , exportFilter = [ByBranchSize 3]
+ }
+-- Main Instances
+instance ToSchema PhyloConfig
+instance ToSchema PhyloSubConfig
----------------------
--- | PhyloPeriod | --
----------------------
+instance FromJSON PhyloConfig
+instance ToJSON PhyloConfig
+instance FromJSON PhyloSubConfig
+instance ToJSON PhyloSubConfig
--- | PhyloStep : steps of phylomemy on temporal axis
--- Period: tuple (start date, end date) of the step of the phylomemy
--- Levels: levels of granularity
-data PhyloPeriod =
- PhyloPeriod { _phylo_periodId :: !PhyloPeriodId
- , _phylo_periodLevels :: ![PhyloLevel]
- }
- deriving (Generic, Show, Eq)
+instance FromJSON CorpusParser
+instance ToJSON CorpusParser
+instance FromJSON ListParser
+instance ToJSON ListParser
---------------------
--- | PhyloLevel | --
---------------------
+instance FromJSON Proximity
+instance ToJSON Proximity
+instance FromJSON SeaElevation
+instance ToJSON SeaElevation
--- | PhyloLevel : levels of phylomemy on level axis
--- Levels description:
--- Level -1: Ngram equals itself (by identity) == _phylo_Ngrams
--- Level 0: Group of synonyms (by stems + by qualitative expert meaning)
--- Level 1: First level of clustering
--- Level N: Nth level of clustering
-data PhyloLevel =
- PhyloLevel { _phylo_levelId :: !PhyloLevelId
- , _phylo_levelGroups :: ![PhyloGroup]
- }
- deriving (Generic, Show, Eq)
+instance FromJSON TimeUnit
+instance ToJSON TimeUnit
+instance FromJSON CliqueFilter
+instance ToJSON CliqueFilter
---------------------
--- | PhyloGroup | --
---------------------
+instance FromJSON Clique
+instance ToJSON Clique
+instance FromJSON PhyloLabel
+instance ToJSON PhyloLabel
--- | PhyloGroup : group of ngrams at each level and step
--- Label : maybe has a label as text
--- Ngrams: set of terms that build the group
--- Quality : map of measures (support, etc.) that depict some qualitative aspects of a phylo
--- Period Parents|Childs: weighted link to Parents|Childs (Temporal Period axis)
--- Level Parents|Childs: weighted link to Parents|Childs (Level Granularity axis)
--- Pointers are directed link from Self to any PhyloGroup (/= Self ?)
-data PhyloGroup =
- PhyloGroup { _phylo_groupId :: !PhyloGroupId
- , _phylo_groupLabel :: !Text
- , _phylo_groupNgrams :: ![Int]
- , _phylo_groupNgramsMeta :: !(Map Text [Double])
- , _phylo_groupMeta :: !(Map Text Double)
- , _phylo_groupBranchId :: !(Maybe PhyloBranchId)
- , _phylo_groupCooc :: !(Map (Int,Int) Double)
-
- , _phylo_groupPeriodParents :: ![Pointer]
- , _phylo_groupPeriodChilds :: ![Pointer]
-
- , _phylo_groupLevelParents :: ![Pointer]
- , _phylo_groupLevelChilds :: ![Pointer]
- }
- deriving (Generic, NFData, Show, Eq, Ord)
+instance FromJSON Tagger
+instance ToJSON Tagger
--- instance NFData PhyloGroup
+instance FromJSON Sort
+instance ToJSON Sort
+instance FromJSON Order
+instance ToJSON Order
--- | Level : A level of aggregation (-1 = Txt, 0 = Ngrams, 1 = Fis, [2..] = Cluster)
-type Level = Int
--- | Index : A generic index of an element (PhyloGroup, PhyloBranch, etc) in a given List
-type Index = Int
+instance FromJSON Filter
+instance ToJSON Filter
+instance FromJSON SynchronyScope
+instance ToJSON SynchronyScope
-type PhyloPeriodId = (Start, End)
-type PhyloLevelId = (PhyloPeriodId, Level)
-type PhyloGroupId = (PhyloLevelId, Index)
-type PhyloBranchId = (Level, Index)
+instance FromJSON SynchronyStrategy
+instance ToJSON SynchronyStrategy
+instance FromJSON Synchrony
+instance ToJSON Synchrony
--- | Weight : A generic mesure that can be associated with an Id
-type Weight = Double
--- | Pointer : A weighted linked with a given PhyloGroup
-type Pointer = (PhyloGroupId, Weight)
--- | Ngrams : a contiguous sequence of n terms
-type Ngrams = Text
+instance FromJSON Quality
+instance ToJSON Quality
---------------------
--- | Aggregates | --
---------------------
+-- | Software parameters
+data Software =
+ Software { _software_name :: Text
+ , _software_version :: Text
+ } deriving (Generic, Show, Eq)
+instance ToSchema Software where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
--- | Document : a piece of Text linked to a Date
-data Document = Document
- { date :: !Date
- , text :: ![Ngrams]
- } deriving (Show,Generic,NFData)
--- | Clique : Set of ngrams cooccurring in the same Document
-type Clique = Set Ngrams
--- | Support : Number of Documents where a Clique occurs
-type Support = Int
--- | Fis : Frequent Items Set (ie: the association between a Clique and a Support)
-data PhyloFis = PhyloFis
- { _phyloFis_clique :: !Clique
- , _phyloFis_support :: !Support
- , _phyloFis_period :: !(Date,Date)
- } deriving (Generic,NFData,Show,Eq)
--- | A list of clustered PhyloGroup
-type PhyloCluster = [PhyloGroup]
+defaultSoftware :: Software
+defaultSoftware =
+ Software { _software_name = pack "Gargantext"
+ , _software_version = pack "v4" }
--- | A PhyloGroup in a Graph
-type GroupNode = PhyloGroup
--- | A weighted links between two PhyloGroups in a Graph
-type GroupEdge = ((PhyloGroup,PhyloGroup),Weight)
--- | The association as a Graph between a list of Nodes and a list of Edges
-type GroupGraph = ([GroupNode],[GroupEdge])
+-- | Global parameters of a Phylo
+data PhyloParam =
+ PhyloParam { _phyloParam_version :: Text
+ , _phyloParam_software :: Software
+ , _phyloParam_config :: PhyloConfig
+ } deriving (Generic, Show, Eq)
+instance ToSchema PhyloParam where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
----------------
--- | Error | --
----------------
-data PhyloError = LevelDoesNotExist
- | LevelUnassigned
- deriving (Show)
+defaultPhyloParam :: PhyloParam
+defaultPhyloParam =
+ PhyloParam { _phyloParam_version = pack "v2.adaptative"
+ , _phyloParam_software = defaultSoftware
+ , _phyloParam_config = defaultConfig }
------------------
--- | Cluster | --
------------------
+------------------
+-- | Document | --
+------------------
+-- | Date : a simple Integer
+type Date = Int
--- | Cluster constructors
-data Cluster = Fis FisParams
- | RelatedComponents RCParams
- | Louvain LouvainParams
- deriving (Generic, Show, Eq, Read)
+-- | Ngrams : a contiguous sequence of n terms
+type Ngrams = Text
--- | Parameters for Fis clustering
-data FisParams = FisParams
- { _fis_keepMinorFis :: !Bool
- , _fis_minSupport :: !Support
- , _fis_minSize :: !Int
- } deriving (Generic, Show, Eq, Read)
+-- Document : a piece of Text linked to a Date
+-- date = computational date; date' = original string date yyyy-mm-dd
+-- Export Database to Document
+data Document = Document
+ { date :: Date -- datatype Date {unDate :: Int}
+ , date' :: Text -- show date
+ , text :: [Ngrams]
+ , weight :: Maybe Double
+ , sources :: [Text]
+ } deriving (Eq,Show,Generic,NFData)
--- | Parameters for RelatedComponents clustering
-data RCParams = RCParams
- { _rc_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
--- | Parameters for Louvain clustering
-data LouvainParams = LouvainParams
- { _louvain_proximity :: !Proximity } deriving (Generic, Show, Eq, Read)
+--------------------
+-- | Foundation | --
+--------------------
--------------------
--- | Proximity | --
--------------------
+-- | The Foundations of a Phylo created from a given TermList
+data PhyloFoundations = PhyloFoundations
+ { _foundations_roots :: !(Vector Ngrams)
+ , _foundations_mapList :: TermList
+ } deriving (Generic, Show, Eq)
+instance ToSchema PhyloFoundations where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_foundations_")
--- | Proximity constructors
-data Proximity = WeightedLogJaccard WLJParams
- | WeightedLogSim WLJParams
- | Hamming HammingParams
- | Filiation
- deriving (Generic, Show, Eq, Read)
--- | Parameters for WeightedLogJaccard and WeightedLogSim proximity
-data WLJParams = WLJParams
- { _wlj_threshold :: !Double
- , _wlj_sensibility :: !Double
- } deriving (Generic, Show, Eq, Read)
--- | Parameters for Hamming proximity
-data HammingParams = HammingParams
- { _hamming_threshold :: !Double } deriving (Generic, Show, Eq, Read)
+data PhyloSources = PhyloSources
+ { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
+instance ToSchema PhyloSources where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
-----------------
--- | Filter | --
-----------------
+---------------------------
+-- | Coocurency Matrix | --
+---------------------------
--- | Filter constructors
-data Filter = LonelyBranch LBParams
- | SizeBranch SBParams
- deriving (Generic, Show, Eq)
+-- | Cooc : a coocurency matrix between two ngrams
+type Cooc = Map (Int,Int) Double
--- | Parameters for LonelyBranch filter
-data LBParams = LBParams
- { _lb_periodsInf :: !Int
- , _lb_periodsSup :: !Int
- , _lb_minNodes :: !Int } deriving (Generic, Show, Eq)
--- | Parameters for SizeBranch filter
-data SBParams = SBParams
- { _sb_minSize :: !Int } deriving (Generic, Show, Eq)
+-------------------
+-- | Phylomemy | --
+-------------------
-----------------
--- | Metric | --
-----------------
+-- | Phylo datatype of a phylomemy
+-- foundations : the foundations of the phylo
+-- timeCooc : a Map of coocurency by minimal unit of time (ex: by year)
+-- timeDocs : a Map with the numbers of docs by minimal unit of time (ex: by year)
+-- param : the parameters of the phylomemy (with the user's configuration)
+-- periods : the temporal steps of a phylomemy
+data Phylo =
+ Phylo { _phylo_foundations :: PhyloFoundations
+ , _phylo_sources :: PhyloSources
+ , _phylo_timeCooc :: !(Map Date Cooc)
+ , _phylo_timeDocs :: !(Map Date Double)
+ , _phylo_termFreq :: !(Map Int Double)
+ , _phylo_lastTermFreq :: !(Map Int Double)
+ , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
+ , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
+ , _phylo_param :: PhyloParam
+ , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
+ }
+ deriving (Generic, Show, Eq)
+instance ToSchema Phylo where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
--- | Metric constructors
-data Metric = BranchAge | BranchBirth | BranchGroups deriving (Generic, Show, Eq, Read)
+-- | PhyloPeriodId : the id of a given period
+type PhyloPeriodId = (Date,Date)
-----------------
--- | Tagger | --
-----------------
+-- | PhyloPeriod : steps of a phylomemy on a temporal axis
+-- id: tuple (start date, end date) of the temporal step of the phylomemy
+-- levels: levels of granularity
+data PhyloPeriod =
+ PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
+ , _phylo_periodPeriod' :: (Text,Text)
+ , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
+ } deriving (Generic, Show, Eq)
+instance ToSchema PhyloPeriod where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
--- | Tagger constructors
-data Tagger = BranchPeakFreq | BranchPeakCooc | BranchPeakInc
- | GroupLabelCooc | GroupLabelInc | GroupLabelIncDyn deriving (Show,Generic,Read)
---------------
--- | Sort | --
---------------
+-- | Level : a level of clustering
+type Level = Int
+-- | PhyloLevelId : the id of a level of clustering in a given period
+type PhyloLevelId = (PhyloPeriodId,Level)
--- | Sort constructors
-data Sort = ByBranchAge | ByBranchBirth deriving (Generic, Show, Read, Enum, Bounded)
-data Order = Asc | Desc deriving (Generic, Show, Read)
+-- | PhyloLevel : levels of phylomemy on a synchronic axis
+-- Levels description:
+-- Level 0: The foundations and the base of the phylo
+-- Level 1: First level of clustering (the Fis)
+-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
+data PhyloLevel =
+ PhyloLevel { _phylo_levelPeriod :: (Date,Date)
+ , _phylo_levelPeriod' :: (Text,Text)
+ , _phylo_levelLevel :: Level
+ , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
+ }
+ deriving (Generic, Show, Eq)
+instance ToSchema PhyloLevel where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
---------------------
--- | PhyloQuery | --
---------------------
+type PhyloGroupId = (PhyloLevelId, Int)
--- | A Phyloquery describes a phylomemic reconstruction
-data PhyloQueryBuild = PhyloQueryBuild
- { _q_phyloTitle :: !Text
- , _q_phyloDesc :: !Text
+-- | BranchId : (a level, a sequence of branch index)
+-- the sequence is a path of heritage from the most to the less specific branch
+type PhyloBranchId = (Level, [Int])
- -- Grain and Steps for the PhyloPeriods
- , _q_periodGrain :: !Int
- , _q_periodSteps :: !Int
+-- | PhyloGroup : group of ngrams at each level and period
+data PhyloGroup =
+ PhyloGroup { _phylo_groupPeriod :: (Date,Date)
+ , _phylo_groupPeriod' :: (Text,Text)
+ , _phylo_groupLevel :: Level
+ , _phylo_groupIndex :: Int
+ , _phylo_groupLabel :: Text
+ , _phylo_groupSupport :: Support
+ , _phylo_groupWeight :: Maybe Double
+ , _phylo_groupSources :: [Int]
+ , _phylo_groupNgrams :: [Int]
+ , _phylo_groupCooc :: !(Cooc)
+ , _phylo_groupBranchId :: PhyloBranchId
+ , _phylo_groupMeta :: Map Text [Double]
+ , _phylo_groupLevelParents :: [Pointer]
+ , _phylo_groupLevelChilds :: [Pointer]
+ , _phylo_groupPeriodParents :: [Pointer]
+ , _phylo_groupPeriodChilds :: [Pointer]
+ , _phylo_groupAncestors :: [Pointer]
+ , _phylo_groupPeriodMemoryParents :: [Pointer']
+ , _phylo_groupPeriodMemoryChilds :: [Pointer']
+ }
+ deriving (Generic, Show, Eq, NFData)
- -- Clustering method for building the contextual unit of Phylo (ie: level 1)
- , _q_contextualUnit :: !Cluster
- , _q_contextualUnitMetrics :: ![Metric]
- , _q_contextualUnitFilters :: ![Filter]
+instance ToSchema PhyloGroup where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
- -- Inter-temporal matching method of the Phylo
- , _q_interTemporalMatching :: !Proximity
- , _q_interTemporalMatchingFrame :: !Int
- , _q_interTemporalMatchingFrameTh :: !Double
- , _q_reBranchThr :: !Double
- , _q_reBranchNth :: !Int
+-- | Weight : A generic mesure that can be associated with an Id
+type Weight = Double
+type Thr = Double
- -- Last level of reconstruction
- , _q_nthLevel :: !Level
- -- Clustering method used from level 1 to nthLevel
- , _q_nthCluster :: !Cluster
- } deriving (Generic, Show, Eq)
+-- | Pointer : A weighted pointer to a given PhyloGroup
+type Pointer = (PhyloGroupId, Weight)
+-- | Pointer' : A weighted pointer to a given PhyloGroup with a lower bounded threshold
+type Pointer' = (PhyloGroupId, (Thr,Weight))
--- | To choose the Phylo edge you want to export : --> <-- <--> <=>
-data Filiation = Ascendant | Descendant | Merge | Complete deriving (Generic, Show, Read)
-data EdgeType = PeriodEdge | LevelEdge deriving (Generic, Show, Eq)
+data Filiation = ToParents | ToChilds | ToParentsMemory | ToChildsMemory deriving (Generic, Show)
+data PointerType = TemporalPointer | LevelPointer deriving (Generic, Show)
--------------------
--- | PhyloView | --
--------------------
+----------------------
+-- | Phylo Clique | --
+----------------------
--- | A PhyloView is the output type of a Phylo
-data PhyloView = PhyloView
- { _pv_param :: !PhyloParam
- , _pv_title :: !Text
- , _pv_description :: !Text
- , _pv_filiation :: !Filiation
- , _pv_level :: !Level
- , _pv_periods :: ![PhyloPeriodId]
- , _pv_metrics :: !(Map Text [Double])
- , _pv_branches :: ![PhyloBranch]
- , _pv_nodes :: ![PhyloNode]
- , _pv_edges :: ![PhyloEdge]
- } deriving (Generic, Show)
-
--- | A phyloview is made of PhyloBranches, edges and nodes
-data PhyloBranch = PhyloBranch
- { _pb_id :: !PhyloBranchId
- , _pb_peak :: !Text
- , _pb_metrics :: !(Map Text [Double])
- } deriving (Generic, Show)
-
-data PhyloEdge = PhyloEdge
- { _pe_source :: !PhyloGroupId
- , _pe_target :: !PhyloGroupId
- , _pe_type :: !EdgeType
- , _pe_weight :: !Weight
- } deriving (Generic, Show)
-
-data PhyloNode = PhyloNode
- { _pn_id :: !PhyloGroupId
- , _pn_bid :: !(Maybe PhyloBranchId)
- , _pn_label :: !Text
- , _pn_idx :: ![Int]
- , _pn_ngrams :: !(Maybe [Ngrams])
- , _pn_metrics :: !(Map Text [Double])
- , _pn_cooc :: !(Map (Int,Int) Double)
- , _pn_parents :: !(Maybe [PhyloGroupId])
- , _pn_childs :: ![PhyloNode]
- } deriving (Generic, Show)
+-- | Support : Number of Documents where a Clique occurs
+type Support = Int
-------------------------
--- | PhyloQueryView | --
-------------------------
+data PhyloClique = PhyloClique
+ { _phyloClique_nodes :: [Int]
+ , _phyloClique_support :: Support
+ , _phyloClique_period :: (Date,Date)
+ , _phyloClique_weight :: Maybe Double
+ , _phyloClique_sources :: [Int]
+ } deriving (Generic,NFData,Show,Eq)
+
+----------------
+-- | Export | --
+----------------
+type DotId = TextLazy.Text
-data ExportMode = Json | Dot | Svg
- deriving (Generic, Show, Read)
-data DisplayMode = Flat | Nested
- deriving (Generic, Show, Read)
+data EdgeType = GroupToGroup | GroupToGroupMemory | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
--- | A PhyloQueryView describes a Phylo as an output view
-data PhyloQueryView = PhyloQueryView
- { _qv_lvl :: !Level
+data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
+instance ToSchema Filter where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
- -- Does the PhyloGraph contain ascendant, descendant or a complete Filiation ? Complet redondant et merge (avec le max)
- , _qv_filiation :: !Filiation
- -- Does the PhyloGraph contain some levelChilds ? How deep must it go ?
- , _qv_levelChilds :: !Bool
- , _qv_levelChildsDepth :: !Level
+data Order = Asc | Desc deriving (Show,Generic,Eq, ToSchema)
- -- Ordered lists of filters, taggers and metrics to be applied to the PhyloGraph
- -- Firstly the metrics, then the filters and the taggers
- , _qv_metrics :: ![Metric]
- , _qv_filters :: ![Filter]
- , _qv_taggers :: ![Tagger]
+data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy {_sort_order :: Order } deriving (Show,Generic,Eq)
+instance ToSchema Sort where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sort_")
- -- An asc or desc sort to apply to the PhyloGraph
- , _qv_sort :: !(Maybe (Sort,Order))
- -- A display mode to apply to the PhyloGraph, ie: [Node[Node,Edge],Edge] or [[Node,Node],[Edge,Edge]]
- , _qv_export :: !ExportMode
- , _qv_display :: !DisplayMode
- , _qv_verbose :: !Bool
- }
+data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
+instance ToSchema Tagger where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "")
+
+
+data PhyloLabel =
+ BranchLabel
+ { _branch_labelTagger :: Tagger
+ , _branch_labelSize :: Int }
+ | GroupLabel
+ { _group_labelTagger :: Tagger
+ , _group_labelSize :: Int }
+ deriving (Show,Generic,Eq)
+
+instance ToSchema PhyloLabel where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_")
+
+
+data PhyloBranch =
+ PhyloBranch
+ { _branch_id :: PhyloBranchId
+ , _branch_canonId :: [Int]
+ , _branch_seaLevel :: [Double]
+ , _branch_x :: Double
+ , _branch_y :: Double
+ , _branch_w :: Double
+ , _branch_t :: Double
+ , _branch_label :: Text
+ , _branch_meta :: Map Text [Double]
+ } deriving (Generic, Show, Eq)
+
+instance ToSchema PhyloBranch where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_branch_")
+
+data PhyloExport =
+ PhyloExport
+ { _export_groups :: [PhyloGroup]
+ , _export_branches :: [PhyloBranch]
+ } deriving (Generic, Show)
+instance ToSchema PhyloExport where
+ declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_export_")
----------------
-- | Lenses | --
----------------
-
-makeLenses ''PhyloParam
-makeLenses ''Software
---
-makeLenses ''Phylo
+makeLenses ''PhyloConfig
+makeLenses ''PhyloSubConfig
+makeLenses ''Proximity
+makeLenses ''SeaElevation
+makeLenses ''Quality
+makeLenses ''Clique
+makeLenses ''PhyloLabel
+makeLenses ''TimeUnit
makeLenses ''PhyloFoundations
-makeLenses ''PhyloGroup
-makeLenses ''PhyloLevel
+makeLenses ''PhyloClique
+makeLenses ''Phylo
makeLenses ''PhyloPeriod
-makeLenses ''PhyloFis
---
-makeLenses ''Proximity
-makeLenses ''Cluster
-makeLenses ''Filter
---
-makeLenses ''PhyloQueryBuild
-makeLenses ''PhyloQueryView
---
-makeLenses ''PhyloView
+makeLenses ''PhyloLevel
+makeLenses ''PhyloGroup
+makeLenses ''PhyloParam
+makeLenses ''PhyloExport
makeLenses ''PhyloBranch
-makeLenses ''PhyloNode
-makeLenses ''PhyloEdge
-
------------------------
-- | JSON instances | --
------------------------
+instance FromJSON Phylo
+instance ToJSON Phylo
-$(deriveJSON (unPrefix "_phylo_" ) ''Phylo )
-$(deriveJSON (unPrefix "_phylo_foundations" ) ''PhyloFoundations )
-$(deriveJSON (unPrefix "_phylo_period" ) ''PhyloPeriod )
-$(deriveJSON (unPrefix "_phylo_level" ) ''PhyloLevel )
-$(deriveJSON (unPrefix "_phylo_group" ) ''PhyloGroup )
-$(deriveJSON (unPrefix "_phyloFis_" ) ''PhyloFis )
---
-$(deriveJSON (unPrefix "_software_" ) ''Software )
-$(deriveJSON (unPrefix "_phyloParam_" ) ''PhyloParam )
---
-$(deriveJSON defaultOptions ''Filter )
-$(deriveJSON defaultOptions ''Metric )
-$(deriveJSON defaultOptions ''Cluster )
-$(deriveJSON defaultOptions ''Proximity )
---
-$(deriveJSON (unPrefix "_fis_" ) ''FisParams )
-$(deriveJSON (unPrefix "_hamming_" ) ''HammingParams )
-$(deriveJSON (unPrefix "_louvain_" ) ''LouvainParams )
-$(deriveJSON (unPrefix "_rc_" ) ''RCParams )
-$(deriveJSON (unPrefix "_wlj_" ) ''WLJParams )
---
-$(deriveJSON (unPrefix "_lb_" ) ''LBParams )
-$(deriveJSON (unPrefix "_sb_" ) ''SBParams )
---
-$(deriveJSON (unPrefix "_q_" ) ''PhyloQueryBuild )
-$(deriveJSON (unPrefix "_pv_" ) ''PhyloView )
-$(deriveJSON (unPrefix "_pb_" ) ''PhyloBranch )
-$(deriveJSON (unPrefix "_pe_" ) ''PhyloEdge )
-$(deriveJSON (unPrefix "_pn_" ) ''PhyloNode )
-
-$(deriveJSON defaultOptions ''Filiation )
-$(deriveJSON defaultOptions ''EdgeType )
+instance FromJSON PhyloSources
+instance ToJSON PhyloSources
----------------------------
--- | Swagger instances | --
----------------------------
+instance FromJSON PhyloParam
+instance ToJSON PhyloParam
-instance ToSchema Phylo where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_")
-instance ToSchema PhyloFoundations where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_foundations")
-instance ToSchema PhyloPeriod where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_period")
-instance ToSchema PhyloLevel where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_level")
-instance ToSchema PhyloGroup where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phylo_group")
-instance ToSchema PhyloFis where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloFis_")
-instance ToSchema Software where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_software_")
-instance ToSchema PhyloParam where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_phyloParam_")
-instance ToSchema Filter
-instance ToSchema Metric
-instance ToSchema Cluster
-instance ToSchema Proximity where
- declareNamedSchema = genericDeclareNamedSchemaUnrestricted defaultSchemaOptions
-instance ToSchema FisParams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_fis_")
-instance ToSchema HammingParams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_hamming_")
-instance ToSchema LouvainParams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_louvain_")
-instance ToSchema RCParams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_rc_")
-instance ToSchema WLJParams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_wlj_")
-instance ToSchema LBParams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_lb_")
-instance ToSchema SBParams where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_sb_")
-instance ToSchema PhyloQueryBuild where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_q_")
-instance ToSchema PhyloView where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pv_")
-instance ToSchema PhyloBranch where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pb_")
-instance ToSchema PhyloEdge where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pe_")
-instance ToSchema PhyloNode where
- declareNamedSchema = genericDeclareNamedSchema (unPrefixSwagger "_pn_")
-instance ToSchema Filiation
-instance ToSchema EdgeType
-
-----------------------------
--- | TODO XML instances | --
-----------------------------
+instance FromJSON PhyloPeriod
+instance ToJSON PhyloPeriod
+
+instance FromJSON PhyloLevel
+instance ToJSON PhyloLevel
+
+instance FromJSON Software
+instance ToJSON Software
+
+instance FromJSON PhyloGroup
+instance ToJSON PhyloGroup
+$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)