data CorpusParser =
- Wos {_wos_limit :: Int}
- | Csv {_csv_limit :: Int}
+ Wos {_wos_limit :: Int}
+ | Csv {_csv_limit :: Int}
+ | Csv' {_csv'_limit :: Int}
deriving (Show,Generic,Eq)
data SeaElevation =
-- , _wlj_elevation :: Double
-}
}
+ | WeightedLogSim
+ { _wlj_sensibility :: Double
+{-
+ -- , _wlj_thresholdInit :: Double
+ -- , _wlj_thresholdStep :: Double
+ -- | max height for sea level in temporal matching
+ -- , _wlj_elevation :: Double
+-}
+ }
| Hamming
deriving (Show,Generic,Eq)
{ _year_period :: Int
, _year_step :: Int
, _year_matchingFrame :: Int }
+ | Month
+ { _month_period :: Int
+ , _month_step :: Int
+ , _month_matchingFrame :: Int }
+ | Week
+ { _week_period :: Int
+ , _week_step :: Int
+ , _week_matchingFrame :: Int }
+ | Day
+ { _day_period :: Int
+ , _day_step :: Int
+ , _day_matchingFrame :: Int }
deriving (Show,Generic,Eq)
+data CliqueFilter = ByThreshold | ByNeighbours deriving (Show,Generic,Eq)
data Clique =
Fis
{ _fis_support :: Int
, _fis_size :: Int }
| MaxClique
- { _mcl_size :: Int }
+ { _mcl_size :: Int
+ , _mcl_threshold :: Double
+ , _mcl_filter :: CliqueFilter }
deriving (Show,Generic,Eq)
, phyloLevel :: Int
, phyloProximity :: Proximity
, seaElevation :: SeaElevation
+ , findAncestors :: Bool
, phyloSynchrony :: Synchrony
, phyloQuality :: Quality
, timeUnit :: TimeUnit
, phyloName = pack "Default Phylo"
, phyloLevel = 2
, phyloProximity = WeightedLogJaccard 10
- , seaElevation = Constante 0.6 1
- , phyloSynchrony = ByProximityThreshold 0.5 10 SiblingBranches MergeAllGroups
- , phyloQuality = Quality 100 1
+ , seaElevation = Constante 0.1 0.1
+ , findAncestors = True
+ , phyloSynchrony = ByProximityThreshold 0.1 10 SiblingBranches MergeAllGroups
+ , phyloQuality = Quality 0 1
, timeUnit = Year 3 1 5
- , clique = MaxClique 0
- , exportLabel = [BranchLabel MostInclusive 2, GroupLabel MostEmergentInclusive 2]
+ , clique = MaxClique 0 3 ByNeighbours
+ , exportLabel = [BranchLabel MostEmergentTfIdf 2, GroupLabel MostEmergentInclusive 2]
, exportSort = ByHierarchy
, exportFilter = [ByBranchSize 2]
}
instance ToJSON SeaElevation
instance FromJSON TimeUnit
instance ToJSON TimeUnit
+instance FromJSON CliqueFilter
+instance ToJSON CliqueFilter
instance FromJSON Clique
instance ToJSON Clique
instance FromJSON PhyloLabel
-- | Document | --
------------------
-
-- | Date : a simple Integer
type Date = Int
-- | Ngrams : a contiguous sequence of n terms
type Ngrams = Text
--- | Document : a piece of Text linked to a Date
+-- Document : a piece of Text linked to a Date
+-- date = computational date; date' = original string date yyyy-mm-dd
data Document = Document
- { date :: Date
- , text :: [Ngrams]
+ { date :: Date
+ , date' :: Text
+ , text :: [Ngrams]
+ , weight :: Maybe Double
+ , sources :: [Text]
} deriving (Eq,Show,Generic,NFData)
} deriving (Generic, Show, Eq)
+data PhyloSources = PhyloSources
+ { _sources :: !(Vector Text) } deriving (Generic, Show, Eq)
+
+
---------------------------
-- | Coocurency Matrix | --
---------------------------
-- param : the parameters of the phylomemy (with the user's configuration)
-- periods : the temporal steps of a phylomemy
data Phylo =
- Phylo { _phylo_foundations :: PhyloFoundations
- , _phylo_timeCooc :: !(Map Date Cooc)
- , _phylo_timeDocs :: !(Map Date Double)
- , _phylo_termFreq :: !(Map Int Double)
- , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
- , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
- , _phylo_param :: PhyloParam
- , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
+ Phylo { _phylo_foundations :: PhyloFoundations
+ , _phylo_sources :: PhyloSources
+ , _phylo_timeCooc :: !(Map Date Cooc)
+ , _phylo_timeDocs :: !(Map Date Double)
+ , _phylo_termFreq :: !(Map Int Double)
+ , _phylo_lastTermFreq :: !(Map Int Double)
+ , _phylo_horizon :: !(Map (PhyloGroupId,PhyloGroupId) Double)
+ , _phylo_groupsProxi :: !(Map (PhyloGroupId,PhyloGroupId) Double)
+ , _phylo_param :: PhyloParam
+ , _phylo_periods :: Map PhyloPeriodId PhyloPeriod
}
deriving (Generic, Show, Eq)
-- id: tuple (start date, end date) of the temporal step of the phylomemy
-- levels: levels of granularity
data PhyloPeriod =
- PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
- , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
+ PhyloPeriod { _phylo_periodPeriod :: (Date,Date)
+ , _phylo_periodPeriod' :: (Text,Text)
+ , _phylo_periodLevels :: Map PhyloLevelId PhyloLevel
} deriving (Generic, Show, Eq)
-- Level 1: First level of clustering (the Fis)
-- Level [2..N]: Nth level of synchronic clustering (cluster of Fis)
data PhyloLevel =
- PhyloLevel { _phylo_levelPeriod :: (Date,Date)
- , _phylo_levelLevel :: Level
- , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
+ PhyloLevel { _phylo_levelPeriod :: (Date,Date)
+ , _phylo_levelPeriod' :: (Text,Text)
+ , _phylo_levelLevel :: Level
+ , _phylo_levelGroups :: Map PhyloGroupId PhyloGroup
}
deriving (Generic, Show, Eq)
-- | PhyloGroup : group of ngrams at each level and period
data PhyloGroup =
PhyloGroup { _phylo_groupPeriod :: (Date,Date)
+ , _phylo_groupPeriod' :: (Text,Text)
, _phylo_groupLevel :: Level
- , _phylo_groupIndex :: Int
+ , _phylo_groupIndex :: Int
, _phylo_groupLabel :: Text
, _phylo_groupSupport :: Support
+ , _phylo_groupWeight :: Maybe Double
+ , _phylo_groupSources :: [Int]
, _phylo_groupNgrams :: [Int]
, _phylo_groupCooc :: !(Cooc)
, _phylo_groupBranchId :: PhyloBranchId
, _phylo_groupLevelChilds :: [Pointer]
, _phylo_groupPeriodParents :: [Pointer]
, _phylo_groupPeriodChilds :: [Pointer]
+ , _phylo_groupAncestors :: [Pointer]
}
deriving (Generic, Show, Eq, NFData)
{ _phyloClique_nodes :: [Int]
, _phyloClique_support :: Support
, _phyloClique_period :: (Date,Date)
- } deriving (Generic,NFData,Show,Eq)
-
-
-------------------------
--- | Phylo Ancestor | --
-------------------------
-
-data PhyloAncestor = PhyloAncestor
- { _phyloAncestor_id :: Int
- , _phyloAncestor_ngrams :: [Int]
- , _phyloAncestor_groups :: [PhyloGroupId]
+ , _phyloClique_weight :: Maybe Double
+ , _phyloClique_sources :: [Int]
} deriving (Generic,NFData,Show,Eq)
----------------
type DotId = TextLazy.Text
-data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | PeriodToPeriod deriving (Show,Generic,Eq)
+data EdgeType = GroupToGroup | BranchToGroup | BranchToBranch | GroupToAncestor | PeriodToPeriod deriving (Show,Generic,Eq)
data Filter = ByBranchSize { _branch_size :: Double } deriving (Show,Generic,Eq)
data Sort = ByBirthDate { _sort_order :: Order } | ByHierarchy deriving (Show,Generic,Eq)
-data Tagger = MostInclusive | MostEmergentInclusive deriving (Show,Generic,Eq)
+data Tagger = MostInclusive | MostEmergentInclusive | MostEmergentTfIdf deriving (Show,Generic,Eq)
data PhyloLabel =
BranchLabel
PhyloExport
{ _export_groups :: [PhyloGroup]
, _export_branches :: [PhyloBranch]
- , _export_ancestors :: [PhyloAncestor]
} deriving (Generic, Show)
----------------
-- | JSON instances | --
------------------------
+instance FromJSON Phylo
+instance ToJSON Phylo
+instance FromJSON PhyloSources
+instance ToJSON PhyloSources
+instance FromJSON PhyloParam
+instance ToJSON PhyloParam
+instance FromJSON PhyloPeriod
+instance ToJSON PhyloPeriod
+instance FromJSON PhyloLevel
+instance ToJSON PhyloLevel
+instance FromJSON Software
+instance ToJSON Software
+instance FromJSON PhyloGroup
+instance ToJSON PhyloGroup
$(deriveJSON (unPrefix "_foundations_" ) ''PhyloFoundations)