module Gargantext.Core.Viz.Phylo.PhyloExport where
-import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList)
+import Data.Map (Map, fromList, empty, fromListWith, insert, (!), elems, unionWith, findWithDefault, toList, member)
import Data.List ((++), sort, nub, null, concat, sortOn, groupBy, union, (\\), (!!), init, partition, notElem, unwords, nubBy, inits, elemIndex)
import Data.Vector (Vector)
, toAttr "label" (pack $ show $ b ^. branch_label)
])
-periodToDotNode :: (Date,Date) -> Dot DotId
-periodToDotNode prd =
+periodToDotNode :: (Date,Date) -> (Text.Text,Text.Text) -> Dot DotId
+periodToDotNode prd prd' =
node (periodIdToDotId prd)
([Shape BoxShape, FontSize 50, Label (toDotLabel $ Text.pack (show (fst prd) <> " " <> show (snd prd)))]
- <> [ toAttr "nodeType" "period"
+ <> [ toAttr "nodeType" "period"
+ , toAttr "strFrom" (fromStrict $ Text.pack $ (show $ fst prd'))
+ , toAttr "strTo" (fromStrict $ Text.pack $ (show $ snd prd'))
, toAttr "from" (fromStrict $ Text.pack $ (show $ fst prd))
, toAttr "to" (fromStrict $ Text.pack $ (show $ snd prd))])
<> [ toAttr "nodeType" "group"
, toAttr "from" (pack $ show (fst $ g ^. phylo_groupPeriod))
, toAttr "to" (pack $ show (snd $ g ^. phylo_groupPeriod))
+ , toAttr "strFrom" (pack $ show (fst $ g ^. phylo_groupPeriod'))
+ , toAttr "strTo" (pack $ show (snd $ g ^. phylo_groupPeriod'))
, toAttr "branchId" (pack $ unwords (init $ map show $ snd $ g ^. phylo_groupBranchId))
, toAttr "bId" (pack $ show bId)
, toAttr "support" (pack $ show (g ^. phylo_groupSupport))
- , toAttr "label" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
+ , toAttr "weight" (pack $ show (g ^. phylo_groupWeight))
+ , toAttr "source" (pack $ show (nub $ g ^. phylo_groupSources))
+ , toAttr "lbl" (pack $ show (ngramsToLabel fdt (g ^. phylo_groupNgrams)))
, toAttr "foundation" (pack $ show (idxToLabel (g ^. phylo_groupNgrams)))
, toAttr "role" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "dynamics")))
+ , toAttr "frequence" (pack $ show (idxToLabel' ((g ^. phylo_groupMeta) ! "frequence")))
])
-toDotEdge :: DotId -> DotId -> Text.Text -> EdgeType -> Dot DotId
+toDotEdge :: DotId -> DotId -> [Char] -> EdgeType -> Dot DotId
toDotEdge source target lbl edgeType = edge source target
(case edgeType of
- GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True
- , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "link" ]
- BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])
- , Label (StrLabel $ fromStrict lbl)] <> [toAttr "edgeType" "branchLink" ]
- BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])
- , Label (StrLabel $ fromStrict lbl)]
- GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)])
- , Label (StrLabel $ fromStrict lbl), PenWidth 4] <> [toAttr "edgeType" "ancestorLink" ]
+ GroupToGroup -> [ Width 3, penWidth 4, Color [toWColor Black], Constraint True] <> [toAttr "edgeType" "link", toAttr "lbl" (pack lbl)]
+ BranchToGroup -> [ Width 3, Color [toWColor Black], ArrowHead (AType [(ArrMod FilledArrow RightSide,DotArrow)])] <> [toAttr "edgeType" "branchLink" ]
+ BranchToBranch -> [ Width 2, Color [toWColor Black], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,DotArrow)])]
+ GroupToAncestor -> [ Width 3, Color [toWColor Red], Style [SItem Dashed []], ArrowHead (AType [(ArrMod FilledArrow BothSides,NoArrow)]), PenWidth 4] <> [toAttr "edgeType" "ancestorLink", toAttr "lbl" (pack lbl)]
PeriodToPeriod -> [ Width 5, Color [toWColor Black]])
,(toAttr (fromStrict "phyloPeriods") $ pack $ show (length $ elems $ phylo ^. phylo_periods))
,(toAttr (fromStrict "phyloBranches") $ pack $ show (length $ export ^. export_branches))
,(toAttr (fromStrict "phyloGroups") $ pack $ show (length $ export ^. export_groups))
- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
+ ,(toAttr (fromStrict "phyloSources") $ pack $ show (Vector.toList $ getSources phylo))
+ ,(toAttr (fromStrict "phyloTimeScale") $ pack $ getTimeScale phylo)
+ -- ,(toAttr (fromStrict "phyloTermsFreq") $ pack $ show (toList $ _phylo_lastTermFreq phylo))
])
{-
-- 2) create a layer for the branches labels -}
subgraph (Str "Branches peaks") $ do
- graphAttrs [Rank SameRank]
+ -- graphAttrs [Rank SameRank]
{-
-- 3) group the branches by hierarchy
-- mapM (\branches ->
{-- 5) create a layer for each period -}
_ <- mapM (\period ->
- subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst period) <> show (snd period))) $ do
+ subgraph ((Str . fromStrict . Text.pack) $ ("Period" <> show (fst $ _phylo_periodPeriod period) <> show (snd $ _phylo_periodPeriod period))) $ do
graphAttrs [Rank SameRank]
- periodToDotNode period
+ periodToDotNode (period ^. phylo_periodPeriod) (period ^. phylo_periodPeriod')
{-- 6) create a node for each group -}
- mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == period) $ export ^. export_groups)
- ) $ getPeriodIds phylo
+ mapM (\g -> groupToDotNode (getRoots phylo) g (toBid g (export ^. export_branches))) (filter (\g -> g ^. phylo_groupPeriod == (period ^. phylo_periodPeriod)) $ export ^. export_groups)
+ ) $ phylo ^. phylo_periods
{-- 7) create the edges between a branch and its first groups -}
_ <- mapM (\(bId,groups) ->
$ fromListWith (++) $ map (\g -> (g ^. phylo_groupBranchId,[g])) $ export ^. export_groups
{- 8) create the edges between the groups -}
- _ <- mapM (\((k,k'),_) ->
- toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToGroup
+ _ <- mapM (\((k,k'),v) ->
+ toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToGroup
) $ (toList . mergePointers) $ export ^. export_groups
- _ <- mapM (\((k,k'),_) ->
- toDotEdge (groupIdToDotId k) (groupIdToDotId k') "" GroupToAncestor
+ _ <- mapM (\((k,k'),v) ->
+ toDotEdge (groupIdToDotId k) (groupIdToDotId k') (show v) GroupToAncestor
) $ mergeAncestors $ export ^. export_groups
-- 10) create the edges between the periods
$ zip branches
$ ([0] ++ (map (\(b,b') ->
let idx = length $ commonPrefix (b ^. branch_canonId) (b' ^. branch_canonId) []
- in (b' ^. branch_seaLevel) !! (idx - 1)
+ lmin = min (length $ b ^. branch_seaLevel) (length $ b' ^. branch_seaLevel)
+ in
+ if ((idx - 1) > ((length $ b' ^. branch_seaLevel) - 1))
+ then (b' ^. branch_seaLevel) !! (lmin - 1)
+ else (b' ^. branch_seaLevel) !! (idx - 1)
) $ listToSeq branches))
in map (\(x,b) -> b & branch_x .~ x)
$ zip steps branches
+branchToIso' :: Double -> Double -> [PhyloBranch] -> [PhyloBranch]
+branchToIso' start step branches =
+ let bx = map (\l -> (sum l) + ((fromIntegral $ length l) * 0.5))
+ $ inits
+ $ ([0] ++ (map (\(b,b') ->
+ let root = fromIntegral $ length $ commonPrefix (snd $ b ^. branch_id) (snd $ b' ^. branch_id) []
+ in 1 - start - step * root) $ listToSeq branches))
+ in map (\(x,b) -> b & branch_x .~ x)
+ $ zip bx branches
+
sortByHierarchy :: Int -> [PhyloBranch] -> [PhyloBranch]
sortByHierarchy depth branches =
if (length branches == 1)
- then branchToIso branches
- else branchToIso $ concat
+ then branches
+ else concat
$ map (\branches' ->
let partitions = partition (\b -> depth + 1 == ((length . snd) $ b ^. branch_id)) branches'
in (sortOn (\b -> (b ^. branch_meta) ! "birth") (fst partitions))
Desc -> reverse branches
in export & export_branches .~ branches'
-processSort :: Sort -> PhyloExport -> PhyloExport
-processSort sort' export = case sort' of
+processSort :: Sort -> SeaElevation -> PhyloExport -> PhyloExport
+processSort sort' elev export = case sort' of
ByBirthDate o -> sortByBirthDate o export
- ByHierarchy -> export & export_branches .~ sortByHierarchy 0 (export ^. export_branches)
+ ByHierarchy -> export & export_branches .~ (branchToIso' (_cons_start elev) (_cons_step elev)
+ $ sortByHierarchy 0 (export ^. export_branches))
-----------------
+ (sum $ map (\j -> conditional m i j) l)) / (fromIntegral $ (length l) + 1)
-ngramsMetrics :: PhyloExport -> PhyloExport
-ngramsMetrics export =
+ngramsMetrics :: Phylo -> PhyloExport -> PhyloExport
+ngramsMetrics phylo export =
over ( export_groups
. traverse )
(\g -> g & phylo_groupMeta %~ insert "genericity"
(map (\n -> specificity (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
& phylo_groupMeta %~ insert "inclusion"
(map (\n -> inclusion (g ^. phylo_groupCooc) ((g ^. phylo_groupNgrams) \\ [n]) n) $ g ^. phylo_groupNgrams)
+ & phylo_groupMeta %~ insert "frequence"
+ (map (\n -> getInMap n (phylo ^. phylo_lastTermFreq)) $ g ^. phylo_groupNgrams)
) export
& branch_meta %~ insert "age" [fromIntegral age]
& branch_meta %~ insert "size" [fromIntegral $ length periods] ) export
-processMetrics :: PhyloExport -> PhyloExport
-processMetrics export = ngramsMetrics
- $ branchDating export
+processMetrics :: Phylo -> PhyloExport -> PhyloExport
+processMetrics phylo export = ngramsMetrics phylo
+ $ branchDating export
-----------------
-- | Taggers | --
-----------------
+nk :: Int -> [[Int]] -> Int
+nk n groups = sum
+ $ map (\g -> if (elem n g)
+ then 1
+ else 0) groups
+
+
+tf :: Int -> [[Int]] -> Double
+tf n groups = (fromIntegral $ nk n groups) / (fromIntegral $ length $ concat groups)
+
+
+idf :: Int -> [[Int]] -> Double
+idf n groups = log ((fromIntegral $ length groups) / (fromIntegral $ nk n groups))
+
+
+findTfIdf :: [[Int]] -> [(Int,Double)]
+findTfIdf groups = reverse $ sortOn snd $ map (\n -> (n,(tf n groups) * (idf n groups))) $ sort $ nub $ concat groups
+
+
+findEmergences :: [PhyloGroup] -> Map Int Double -> [(Int,Double)]
+findEmergences groups freq =
+ let ngrams = map _phylo_groupNgrams groups
+ dynamics = map (\g -> (g ^. phylo_groupMeta) ! "dynamics") groups
+ emerging = nubBy (\n1 n2 -> fst n1 == fst n2)
+ $ concat $ map (\g -> filter (\(_,d) -> d == 0) $ zip (fst g) (snd g)) $ zip ngrams dynamics
+ in reverse $ sortOn snd
+ $ map (\(n,_) -> if (member n freq)
+ then (n,freq ! n)
+ else (n,0)) emerging
+
+
+mostEmergentTfIdf :: Int -> Map Int Double -> Vector Ngrams -> PhyloExport -> PhyloExport
+mostEmergentTfIdf nth freq foundations export =
+ over ( export_branches
+ . traverse )
+ (\b ->
+ let groups = filter (\g -> g ^. phylo_groupBranchId == b ^. branch_id) $ export ^. export_groups
+ tfidf = findTfIdf (map _phylo_groupNgrams groups)
+ emergences = findEmergences groups freq
+ selected = if (null emergences)
+ then map fst $ take nth tfidf
+ else [fst $ head' "mostEmergentTfIdf" emergences]
+ ++ (map fst $ take (nth - 1) $ filter (\(n,_) -> n /= (fst $ head' "mostEmergentTfIdf" emergences)) tfidf)
+ in b & branch_label .~ (ngramsToLabel foundations selected)) export
+
+
getNthMostMeta :: Int -> [Double] -> [Int] -> [Int]
getNthMostMeta nth meta ns = map (\(idx,_) -> (ns !! idx))
$ take nth
in g & phylo_groupLabel .~ lbl ) export
-processLabels :: [PhyloLabel] -> Vector Ngrams -> PhyloExport -> PhyloExport
-processLabels labels foundations export =
+processLabels :: [PhyloLabel] -> Vector Ngrams -> Map Int Double -> PhyloExport -> PhyloExport
+processLabels labels foundations freq export =
foldl (\export' label ->
case label of
GroupLabel tagger nth ->
BranchLabel tagger nth ->
case tagger of
MostInclusive -> mostInclusive nth foundations export'
+ MostEmergentTfIdf -> mostEmergentTfIdf nth freq foundations export'
_ -> panic "[ERR][Viz.Phylo.PhyloExport] unknown tagger" ) export labels
{- decrease -}
then 2
else if ((fst prd) == (fst $ m ! n))
- {- recombination -}
+ {- emerging -}
then 0
else if isNew
{- emergence -}
mapGroups :: [[PhyloGroup]]
mapGroups = map (\prd ->
let groups = getGroupsFromLevelPeriods level [prd] phylo
- childs = getPreviousChildIds level frame prd periods phylo
- heads = filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
+ childs = getPreviousChildIds level frame prd periods phylo
+ -- maybe add a better filter for non isolated ancestors
+ heads = filter (\g -> (not . null) $ (g ^. phylo_groupPeriodChilds))
+ $ filter (\g -> null (g ^. phylo_groupPeriodParents) && (notElem (getGroupId g) childs)) groups
noHeads = groups \\ heads
nbDocs = sum $ elems $ filterDocs (phylo ^. phylo_timeDocs) [prd]
diago = reduceDiagos $ filterDiago (phylo ^. phylo_timeCooc) [prd]
toPhyloExport :: Phylo -> DotGraph DotId
toPhyloExport phylo = exportToDot phylo
$ processFilters (exportFilter $ getConfig phylo) (phyloQuality $ getConfig phylo)
- $ processSort (exportSort $ getConfig phylo)
- $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo)
- $ processMetrics export
+ $ processSort (exportSort $ getConfig phylo) (getSeaElevation phylo)
+ $ processLabels (exportLabel $ getConfig phylo) (getRoots phylo) (_phylo_lastTermFreq phylo)
+ $ processMetrics phylo export
where
export :: PhyloExport
export = PhyloExport groups branches
groups = traceExportGroups
$ processDynamics
$ getGroupsFromLevel (phyloLevel $ getConfig phylo)
- $ tracePhyloInfo
- $ toHorizon phylo
+ $ tracePhyloInfo phylo
+ -- \$ toHorizon phylo
traceExportBranches :: [PhyloBranch] -> [PhyloBranch]
<> "-- | Export " <> show(length branches) <> " branches") branches
tracePhyloAncestors :: [[PhyloGroup]] -> [[PhyloGroup]]
-tracePhyloAncestors groups = trace ("\n"
- <> "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors"
- ) groups
+tracePhyloAncestors groups = trace ( "-- | Found " <> show(length $ concat $ map _phylo_groupAncestors $ concat groups) <> " ancestors") groups
tracePhyloInfo :: Phylo -> Phylo
-tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with �� = "
+tracePhyloInfo phylo = trace ("\n" <> "##########################" <> "\n\n" <> "-- | Phylo with �� = "
<> show(_qua_granularity $ phyloQuality $ getConfig phylo) <> " applied to "
<> show(length $ Vector.toList $ getRoots phylo) <> " foundations"
) phylo